progs/funt.scala
changeset 626 2d91b2107656
parent 609 e33545bb2eba
child 628 8067d0a8ba04
equal deleted inserted replaced
625:6709fa87410b 626:2d91b2107656
       
     1 // A Small Compiler for a Simple Functional Language
       
     2 // (includes a lexer and a parser)
       
     3 
     1 import scala.language.implicitConversions    
     4 import scala.language.implicitConversions    
     2 import scala.language.reflectiveCalls 
     5 import scala.language.reflectiveCalls 
     3 import scala.util._
       
     4 import scala.annotation.tailrec
       
     5 import scala.sys.process._
       
     6 
       
     7 def fromFile(name: String) : String = 
       
     8   io.Source.fromFile(name).mkString
       
     9 
     6 
    10 abstract class Rexp 
     7 abstract class Rexp 
    11 case object NULL extends Rexp
     8 case object ZERO extends Rexp
    12 case object EMPTY extends Rexp
     9 case object ONE extends Rexp
    13 case class CHAR(c: Char) extends Rexp
    10 case class CHAR(c: Char) extends Rexp
    14 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    11 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    15 case class RANGE(cs: List[Char]) extends Rexp 
       
    16 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    12 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    17 case class PLUS(r: Rexp) extends Rexp 
       
    18 case class STAR(r: Rexp) extends Rexp 
    13 case class STAR(r: Rexp) extends Rexp 
    19 case class NTIMES(r: Rexp, n: Int) extends Rexp 
    14 case class RECD(x: String, r: Rexp) extends Rexp
    20 case class NUPTOM(r: Rexp, n: Int, m: Int) extends Rexp
    15   
    21 
    16 abstract class Val
    22 object RANGE {
    17 case object Empty extends Val
    23   def apply(s: String) : RANGE = RANGE(s.toList)
    18 case class Chr(c: Char) extends Val
    24 }
    19 case class Sequ(v1: Val, v2: Val) extends Val
    25 def NMTIMES(r: Rexp, n: Int, m: Int) = {
    20 case class Left(v: Val) extends Val
    26   if (m < n) throw new IllegalArgumentException("the number m cannot be smaller than n.")
    21 case class Right(v: Val) extends Val
    27   else NUPTOM(r, n, m - n)
    22 case class Stars(vs: List[Val]) extends Val
    28 }
    23 case class Rec(x: String, v: Val) extends Val
    29 
    24    
    30 case class NOT(r: Rexp) extends Rexp 
       
    31 case class OPT(r: Rexp) extends Rexp 
       
    32 
       
    33 // some convenience for typing in regular expressions
    25 // some convenience for typing in regular expressions
    34 def charlist2rexp(s : List[Char]) : Rexp = s match {
    26 def charlist2rexp(s : List[Char]): Rexp = s match {
    35   case Nil => EMPTY
    27   case Nil => ONE
    36   case c::Nil => CHAR(c)
    28   case c::Nil => CHAR(c)
    37   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    29   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    38 }
    30 }
    39 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
    31 implicit def string2rexp(s : String) : Rexp = 
    40 
    32   charlist2rexp(s.toList)
    41 implicit def RexpOps (r: Rexp) = new {
    33 
       
    34 implicit def RexpOps(r: Rexp) = new {
    42   def | (s: Rexp) = ALT(r, s)
    35   def | (s: Rexp) = ALT(r, s)
    43   def % = STAR(r)
    36   def % = STAR(r)
    44   def ~ (s: Rexp) = SEQ(r, s)
    37   def ~ (s: Rexp) = SEQ(r, s)
    45 }
    38 }
    46 
    39 
    47 implicit def stringOps (s: String) = new {
    40 implicit def stringOps(s: String) = new {
    48   def | (r: Rexp) = ALT(s, r)
    41   def | (r: Rexp) = ALT(s, r)
    49   def | (r: String) = ALT(s, r)
    42   def | (r: String) = ALT(s, r)
    50   def % = STAR(s)
    43   def % = STAR(s)
    51   def ~ (r: Rexp) = SEQ(s, r)
    44   def ~ (r: Rexp) = SEQ(s, r)
    52   def ~ (r: String) = SEQ(s, r)
    45   def ~ (r: String) = SEQ(s, r)
    53 }
    46   def $ (r: Rexp) = RECD(s, r)
    54 
    47 }
    55 
    48 
    56 // nullable function: tests whether the regular 
       
    57 // expression can recognise the empty string
       
    58 def nullable (r: Rexp) : Boolean = r match {
    49 def nullable (r: Rexp) : Boolean = r match {
    59   case NULL => false
    50   case ZERO => false
    60   case EMPTY => true
    51   case ONE => true
    61   case CHAR(_) => false
    52   case CHAR(_) => false
    62   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    53   case ALT(r1, r2) => nullable(r1) || nullable(r2)
    63   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    54   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    64   case STAR(_) => true
    55   case STAR(_) => true
    65   case PLUS(r) => nullable(r)
    56   case RECD(_, r1) => nullable(r1)
    66   case NTIMES(r, i) => if (i == 0) true else nullable(r)
    57 }
    67   case NUPTOM(r, i, j) => if (i == 0) true else nullable(r)
    58 
    68   case RANGE(_) => false
       
    69   case NOT(r) => !(nullable(r))
       
    70   case OPT(_) => true
       
    71 }
       
    72 
       
    73 // derivative of a regular expression w.r.t. a character
       
    74 def der (c: Char, r: Rexp) : Rexp = r match {
    59 def der (c: Char, r: Rexp) : Rexp = r match {
    75   case NULL => NULL
    60   case ZERO => ZERO
    76   case EMPTY => NULL
    61   case ONE => ZERO
    77   case CHAR(d) => if (c == d) EMPTY else NULL
    62   case CHAR(d) => if (c == d) ONE else ZERO
    78   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    63   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
    79   case SEQ(r1, r2) => 
    64   case SEQ(r1, r2) => 
    80     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    65     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    81     else SEQ(der(c, r1), r2)
    66     else SEQ(der(c, r1), r2)
    82   case STAR(r) => SEQ(der(c, r), STAR(r))
    67   case STAR(r) => SEQ(der(c, r), STAR(r))
    83   case PLUS(r) => SEQ(der(c, r), STAR(r))
    68   case RECD(_, r1) => der(c, r1)
    84   case NTIMES(r, i) => 
    69 }
    85     if (i == 0) NULL else der(c, SEQ(r, NTIMES(r, i - 1)))
    70 
    86   case NUPTOM(r, i, j) =>
    71 
    87     if (i == 0 && j == 0) NULL else 
    72 // extracts a string from value
    88     if (i == 0) ALT(der(c, NTIMES(r, j)), der(c, NUPTOM(r, 0, j - 1)))
    73 def flatten(v: Val) : String = v match {
    89     else der(c, SEQ(r, NUPTOM(r, i - 1, j)))
    74   case Empty => ""
    90   case RANGE(cs) => if (cs contains c) EMPTY else NULL
    75   case Chr(c) => c.toString
    91   case NOT(r) => NOT(der (c, r))
    76   case Left(v) => flatten(v)
    92   case OPT(r) => der(c, r)
    77   case Right(v) => flatten(v)
    93 }
    78   case Sequ(v1, v2) => flatten(v1) + flatten(v2)
    94 
    79   case Stars(vs) => vs.map(flatten).mkString
    95 def zeroable (r: Rexp) : Boolean = r match {
    80   case Rec(_, v) => flatten(v)
    96   case NULL => true
    81 }
    97   case EMPTY => false
    82 
    98   case CHAR(_) => false
    83 // extracts an environment from a value;
    99   case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
    84 // used for tokenise a string
   100   case SEQ(r1, r2) => zeroable(r1) || zeroable(r2)
    85 def env(v: Val) : List[(String, String)] = v match {
   101   case STAR(_) => false
    86   case Empty => Nil
   102   case PLUS(r) => zeroable(r)
    87   case Chr(c) => Nil
   103   case NTIMES(r, i) => if (i == 0) false else zeroable(r)
    88   case Left(v) => env(v)
   104   case NUPTOM(r, i, j) => if (i == 0) false else zeroable(r)
    89   case Right(v) => env(v)
   105   case RANGE(_) => false
    90   case Sequ(v1, v2) => env(v1) ::: env(v2)
   106   case NOT(r) => !(zeroable(r))     // bug: incorrect definition for NOT
    91   case Stars(vs) => vs.flatMap(env)
   107   case OPT(_) => false
    92   case Rec(x, v) => (x, flatten(v))::env(v)
   108 }
    93 }
   109 
    94 
   110 // derivative w.r.t. a string (iterates der)
    95 // The Injection Part of the lexer
   111 def ders (s: List[Char], r: Rexp) : Rexp = s match {
    96 
   112   case Nil => r
    97 def mkeps(r: Rexp) : Val = r match {
   113   case c::s => ders(s, der(c, r))
    98   case ONE => Empty
   114 }
    99   case ALT(r1, r2) => 
   115 
   100     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
   116 
   101   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   117 // regular expressions for the While language
   102   case STAR(r) => Stars(Nil)
   118 val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_".toList)
   103   case RECD(x, r) => Rec(x, mkeps(r))
   119 val DIGIT = RANGE("0123456789".toList)
   104 }
       
   105 
       
   106 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
       
   107   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
   108   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
       
   109   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
       
   110   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
       
   111   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
       
   112   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
       
   113   case (CHAR(d), Empty) => Chr(c) 
       
   114   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
       
   115   case _ => { println ("Injection error") ; sys.exit(-1) } 
       
   116 }
       
   117 
       
   118 // some "rectification" functions for simplification
       
   119 def F_ID(v: Val): Val = v
       
   120 def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
       
   121 def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
       
   122 def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
       
   123   case Right(v) => Right(f2(v))
       
   124   case Left(v) => Left(f1(v))
       
   125 }
       
   126 def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
       
   127   case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
       
   128 }
       
   129 def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
       
   130   (v:Val) => Sequ(f1(Empty), f2(v))
       
   131 def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
       
   132   (v:Val) => Sequ(f1(v), f2(Empty))
       
   133 def F_RECD(f: Val => Val) = (v:Val) => v match {
       
   134   case Rec(x, v) => Rec(x, f(v))
       
   135 }
       
   136 def F_ERROR(v: Val): Val = throw new Exception("error")
       
   137 
       
   138 def simp(r: Rexp): (Rexp, Val => Val) = r match {
       
   139   case ALT(r1, r2) => {
       
   140     val (r1s, f1s) = simp(r1)
       
   141     val (r2s, f2s) = simp(r2)
       
   142     (r1s, r2s) match {
       
   143       case (ZERO, _) => (r2s, F_RIGHT(f2s))
       
   144       case (_, ZERO) => (r1s, F_LEFT(f1s))
       
   145       case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
       
   146                 else (ALT (r1s, r2s), F_ALT(f1s, f2s)) 
       
   147     }
       
   148   }
       
   149   case SEQ(r1, r2) => {
       
   150     val (r1s, f1s) = simp(r1)
       
   151     val (r2s, f2s) = simp(r2)
       
   152     (r1s, r2s) match {
       
   153       case (ZERO, _) => (ZERO, F_ERROR)
       
   154       case (_, ZERO) => (ZERO, F_ERROR)
       
   155       case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
       
   156       case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
       
   157       case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
       
   158     }
       
   159   }
       
   160   case RECD(x, r1) => {
       
   161     val (r1s, f1s) = simp(r1)
       
   162     (RECD(x, r1s), F_RECD(f1s))
       
   163   }
       
   164   case r => (r, F_ID)
       
   165 }
       
   166 
       
   167 // lexing functions including simplification
       
   168 def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
       
   169   case Nil => if (nullable(r)) mkeps(r) else { println ("Lexing Error") ; sys.exit(-1) } 
       
   170   case c::cs => {
       
   171     val (r_simp, f_simp) = simp(der(c, r))
       
   172     inj(r, c, f_simp(lex_simp(r_simp, cs)))
       
   173   }
       
   174 }
       
   175 
       
   176 def lexing_simp(r: Rexp, s: String) = env(lex_simp(r, s.toList))
       
   177 
       
   178 
       
   179 // The Lexing Rules for the Fun Language
       
   180 
       
   181 def PLUS(r: Rexp) = r ~ r.%
       
   182 
       
   183 val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | 
       
   184           "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | 
       
   185           "w" | "x" | "y" | "z" | "T" | "_"
       
   186 val DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
   120 val ID = SYM ~ (SYM | DIGIT).% 
   187 val ID = SYM ~ (SYM | DIGIT).% 
   121 val NUM = PLUS(DIGIT)
   188 val NUM = PLUS(DIGIT)
   122 val KEYWORD : Rexp = "if" | "then" | "else" | "write" | "def"
   189 val KEYWORD : Rexp = "if" | "then" | "else" | "write" | "def"
   123 val SEMI: Rexp = ";"
   190 val SEMI: Rexp = ";"
   124 val COMMA: Rexp = ","
   191 val OP: Rexp = "=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
   125 val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<=" | "=>" | "<" | ">" | "%" | "=" | "/"
       
   126 val WHITESPACE = PLUS(" " | "\n" | "\t")
   192 val WHITESPACE = PLUS(" " | "\n" | "\t")
   127 val RPAREN: Rexp = ")"
   193 val RPAREN: Rexp = ")"
   128 val LPAREN: Rexp = "("
   194 val LPAREN: Rexp = "("
       
   195 val COMMA: Rexp = ","
   129 val ALL = SYM | DIGIT | OP | " " | ":" | ";" | "\"" | "=" | "," | "(" | ")"
   196 val ALL = SYM | DIGIT | OP | " " | ":" | ";" | "\"" | "=" | "," | "(" | ")"
   130 val ALL2 = ALL | "\n"
   197 val ALL2 = ALL | "\n"
   131 //val COMMENT2 = ("/*" ~ NOT(ALL.% ~ "*/" ~ ALL.%) ~ "*/")
       
   132 val COMMENT = ("/*" ~ ALL2.% ~ "*/") | ("//" ~ ALL.% ~ "\n")
   198 val COMMENT = ("/*" ~ ALL2.% ~ "*/") | ("//" ~ ALL.% ~ "\n")
   133 
   199 
   134 
   200 
   135 // token for While language
   201 val WHILE_REGS = (("k" $ KEYWORD) | 
       
   202                   ("i" $ ID) | 
       
   203                   ("o" $ OP) | 
       
   204                   ("n" $ NUM) | 
       
   205                   ("s" $ SEMI) | 
       
   206                   ("c" $ COMMA) |
       
   207                   ("pl" $ LPAREN) |
       
   208                   ("pr" $ RPAREN) |
       
   209                   ("w" $ (WHITESPACE | COMMENT))).%
       
   210 
       
   211 
       
   212 
       
   213 // The tokens for the Fun language
       
   214 
   136 abstract class Token
   215 abstract class Token
   137 case object T_WHITESPACE extends Token
       
   138 case object T_SEMI extends Token
   216 case object T_SEMI extends Token
   139 case object T_COMMA extends Token
   217 case object T_COMMA extends Token
   140 case object T_LPAREN extends Token
   218 case object T_LPAREN extends Token
   141 case object T_RPAREN extends Token
   219 case object T_RPAREN extends Token
   142 case object T_COMMENT extends Token
       
   143 case class T_ID(s: String) extends Token
   220 case class T_ID(s: String) extends Token
   144 case class T_OP(s: String) extends Token
   221 case class T_OP(s: String) extends Token
   145 case class T_NUM(s: String) extends Token
   222 case class T_NUM(n: Int) extends Token
   146 case class T_KWD(s: String) extends Token
   223 case class T_KWD(s: String) extends Token
   147 case class T_ERR(s: String) extends Token // special error token
   224 
   148 
   225 val token : PartialFunction[(String, String), Token] = {
   149 
   226   case ("k", s) => T_KWD(s)
   150 type TokenFun = String => Token
   227   case ("i", s) => T_ID(s)
   151 type LexRules = List[(Rexp, TokenFun)]
   228   case ("o", s) => T_OP(s)
   152 val While_lexing_rules: LexRules = 
   229   case ("n", s) => T_NUM(s.toInt)
   153   List((KEYWORD, (s) => T_KWD(s)),
   230   case ("s", _) => T_SEMI
   154        (ID, (s) => T_ID(s)),
   231   case ("c", _) => T_COMMA
   155        (COMMENT, (s) => T_COMMENT),
   232   case ("pl", _) => T_LPAREN
   156        (OP, (s) => T_OP(s)),
   233   case ("pr", _) => T_RPAREN
   157        (NUM, (s) => T_NUM(s)),
   234 }
   158        (SEMI, (s) => T_SEMI),
   235 
   159        (COMMA, (s) => T_COMMA),
   236 
   160        (LPAREN, (s) => T_LPAREN),
   237 def tokenise(s: String) : List[Token] = 
   161        (RPAREN, (s) => T_RPAREN),
   238   lexing_simp(WHILE_REGS, s).collect(token)
   162        (WHITESPACE, (s) => T_WHITESPACE))
   239 
   163 
   240 
   164 
   241 
   165 // calculates derivatives until all of them are zeroable
   242 // Parser combinators
   166 @tailrec
   243 abstract class Parser[I, T](implicit ev: I => Seq[_]) {
   167 def munch(s: List[Char], 
   244   def parse(ts: I): Set[(T, I)]
   168           pos: Int, 
   245 
   169           rs: LexRules, 
   246   def parse_all(ts: I) : Set[T] =
   170           last: Option[(Int, TokenFun)]): Option[(Int, TokenFun)] = {
   247     for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
   171   rs match {
   248 
   172   case Nil => last
   249   def parse_single(ts: I) : T = parse_all(ts).toList match {
   173   case rs if (s.length <= pos) => last
   250     case List(t) => t
   174   case rs => {
   251     case _ => { println ("Parse Error\n") ; sys.exit(-1) }
   175     val ders = rs.map({case (r, tf) => (der(s(pos), r), tf)})
   252   }
   176     val rs_nzero = ders.filterNot({case (r, _) => zeroable(r)})
   253 }
   177     val rs_nulls = ders.filter({case (r, _) => nullable(r)})
   254 
   178     val new_last = if (rs_nulls != Nil) Some((pos, rs_nulls.head._2)) else last
   255 class SeqParser[I, T, S](p: => Parser[I, T], 
   179     munch(s, 1 + pos, rs_nzero, new_last)
   256                          q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, (T, S)] {
   180   }
   257   def parse(sb: I) = 
   181 }}
   258     for ((head1, tail1) <- p.parse(sb); 
   182 
   259          (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
   183 // iterates the munching function and returns a Token list
   260 }
   184 def tokenize(s: String, rs: LexRules) : List[Token] = munch(s.toList, 0, rs, None) match {
   261 
   185   case None if (s == "") => Nil
   262 class AltParser[I, T](p: => Parser[I, T], 
   186   case None => List(T_ERR(s"Lexing error: $s"))
   263                       q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] {
   187   case Some((n, tf)) => {
   264   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
   188     val (head, tail) = s.splitAt(n + 1)
   265 }
   189     tf(head)::tokenize(tail, rs)
   266 
   190   }
   267 class FunParser[I, T, S](p: => Parser[I, T], 
   191 }
   268                          f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] {
   192 
   269   def parse(sb: I) = 
   193 def tokenizer(s:String) : List[Token] = 
   270     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
   194   tokenize(s, While_lexing_rules).filter {
   271 }
   195     case T_ERR(s) => { println(s); sys.exit(-1) }
   272 
   196     case T_WHITESPACE => false
   273 implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
   197     case T_COMMENT => false
   274   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
   198     case _ => true
   275   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
   199   } 
   276   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
   200 
   277 }
   201 
   278 
   202 
   279 def ListParser[I, T, S](p: => Parser[I, T], 
   203 // Parser - Abstract syntax trees
   280                         q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
       
   281   (p ~ q ~ ListParser(p, q)) ==> { case ((x, y), z) => x :: z : List[T] } ||
       
   282   (p ==> ((s) => List(s)))
       
   283 }
       
   284 
       
   285 case class TokParser(tok: Token) extends Parser[List[Token], Token] {
       
   286   def parse(ts: List[Token]) = ts match {
       
   287     case t::ts if (t == tok) => Set((t, ts)) 
       
   288     case _ => Set ()
       
   289   }
       
   290 }
       
   291 
       
   292 implicit def token2tparser(t: Token) = TokParser(t)
       
   293 
       
   294 implicit def TokOps(t: Token) = new {
       
   295   def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q)
       
   296   def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f)
       
   297   def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q)
       
   298 }
       
   299 
       
   300 case object NumParser extends Parser[List[Token], Int] {
       
   301   def parse(ts: List[Token]) = ts match {
       
   302     case T_NUM(n)::ts => Set((n, ts)) 
       
   303     case _ => Set ()
       
   304   }
       
   305 }
       
   306 
       
   307 case object IdParser extends Parser[List[Token], String] {
       
   308   def parse(ts: List[Token]) = ts match {
       
   309     case T_ID(s)::ts => Set((s, ts)) 
       
   310     case _ => Set ()
       
   311   }
       
   312 }
       
   313 
       
   314 
       
   315 
       
   316 // Abstract syntax trees for Fun
   204 abstract class Exp
   317 abstract class Exp
   205 abstract class BExp 
   318 abstract class BExp 
   206 abstract class Decl
   319 abstract class Decl
   207 
   320 
   208 case class Def(name: String, args: List[String], body: Exp) extends Decl
   321 case class Def(name: String, args: List[String], body: Exp) extends Decl
   212 case class If(a: BExp, e1: Exp, e2: Exp) extends Exp
   325 case class If(a: BExp, e1: Exp, e2: Exp) extends Exp
   213 case class Write(e: Exp) extends Exp
   326 case class Write(e: Exp) extends Exp
   214 case class Var(s: String) extends Exp
   327 case class Var(s: String) extends Exp
   215 case class Num(i: Int) extends Exp
   328 case class Num(i: Int) extends Exp
   216 case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
   329 case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
   217 case class Sequ(e1: Exp, e2: Exp) extends Exp
   330 case class Sequence(e1: Exp, e2: Exp) extends Exp
   218 
       
   219 case class Bop(o: String, a1: Exp, a2: Exp) extends BExp
   331 case class Bop(o: String, a1: Exp, a2: Exp) extends BExp
   220 
   332 
   221 // calculating the maximal needed stack size
   333 
   222 def max_stack_exp(e: Exp): Int = e match {
   334 
   223   case Call(_, args) => args.map(max_stack_exp).sum
   335 // Grammar Rules for Fun
   224   case If(a, e1, e2) => max_stack_bexp(a) + (List(max_stack_exp(e1), max_stack_exp(e2)).max)
   336 
   225   case Write(e) => max_stack_exp(e) + 1
   337 // arithmetic expressions
   226   case Var(_) => 1
       
   227   case Num(_) => 1
       
   228   case Aop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2)
       
   229   case Sequ(e1, e2) => List(max_stack_exp(e1), max_stack_exp(e2)).max
       
   230 }
       
   231 def max_stack_bexp(e: BExp): Int = e match {
       
   232   case Bop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2)
       
   233 }
       
   234 
       
   235 
       
   236 
       
   237 // Parser combinators
       
   238 abstract class Parser[I <% Seq[_], T] {
       
   239   def parse(ts: I): Set[(T, I)]
       
   240 
       
   241   def parse_all(ts: I) : Set[T] =
       
   242     for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
       
   243 
       
   244   def parse_single(ts: I) : T = parse_all(ts).toList match {
       
   245     case List(t) => t
       
   246     case _ => { println ("Parse Error") ; sys.exit(-1) }
       
   247   }
       
   248 }
       
   249 
       
   250 class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
       
   251   def parse(sb: I) = 
       
   252     for ((head1, tail1) <- p.parse(sb); 
       
   253          (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
       
   254 }
       
   255 
       
   256 class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
       
   257   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
       
   258 }
       
   259 
       
   260 class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
       
   261   def parse(sb: I) = 
       
   262     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
       
   263 }
       
   264 
       
   265 case class TokParser(tok: Token) extends Parser[List[Token], Token] {
       
   266   def parse(ts: List[Token]) = ts match {
       
   267     case t::ts if (t == tok) => Set((t, ts)) 
       
   268     case _ => Set ()
       
   269   }
       
   270 }
       
   271 
       
   272 implicit def token2tparser(t: Token) = TokParser(t)
       
   273 
       
   274 case object NumParser extends Parser[List[Token], Int] {
       
   275   def parse(ts: List[Token]) = ts match {
       
   276     case T_NUM(s)::ts => Set((s.toInt, ts)) 
       
   277     case _ => Set ()
       
   278   }
       
   279 }
       
   280 
       
   281 case object IdParser extends Parser[List[Token], String] {
       
   282   def parse(ts: List[Token]) = ts match {
       
   283     case T_ID(s)::ts => Set((s, ts)) 
       
   284     case _ => Set ()
       
   285   }
       
   286 }
       
   287 
       
   288 
       
   289 implicit def ParserOps[I<% Seq[_], T](p: Parser[I, T]) = new {
       
   290   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
       
   291   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
       
   292   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
       
   293 }
       
   294 implicit def TokOps(t: Token) = new {
       
   295   def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q)
       
   296   def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f)
       
   297   def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q)
       
   298 }
       
   299 
       
   300 def ListParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]): Parser[I, List[T]] = {
       
   301   (p ~ q ~ ListParser(p, q)) ==> { case ((x, y), z) => x :: z : List[T] } ||
       
   302   (p ==> ((s) => List(s)))
       
   303 }
       
   304 
       
   305 
       
   306 // expressions
       
   307 lazy val Exp: Parser[List[Token], Exp] = 
   338 lazy val Exp: Parser[List[Token], Exp] = 
   308   (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==>
   339   (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==>
   309     { case (((((x, y), z), u), v), w) => If(y, u, w): Exp } ||
   340     { case (((((x, y), z), u), v), w) => If(y, u, w): Exp } ||
   310   (M ~ T_SEMI ~ Exp) ==> { case ((x, y), z) => Sequ(x, z): Exp } || M
   341   (M ~ T_SEMI ~ Exp) ==> { case ((x, y), z) => Sequence(x, z): Exp } || M
   311 lazy val M: Parser[List[Token], Exp] =
   342 lazy val M: Parser[List[Token], Exp] =
   312   (T_KWD("write") ~ L) ==> { case (x, y) => Write(y): Exp } || L
   343   (T_KWD("write") ~ L) ==> { case (x, y) => Write(y): Exp } || L
   313 lazy val L: Parser[List[Token], Exp] = 
   344 lazy val L: Parser[List[Token], Exp] = 
   314   (T ~ T_OP("+") ~ Exp) ==> { case ((x, y), z) => Aop("+", x, z): Exp } ||
   345   (T ~ T_OP("+") ~ Exp) ==> { case ((x, y), z) => Aop("+", x, z): Exp } ||
   315   (T ~ T_OP("-") ~ Exp) ==> { case ((x, y), z) => Aop("-", x, z): Exp } || T  
   346   (T ~ T_OP("-") ~ Exp) ==> { case ((x, y), z) => Aop("-", x, z): Exp } || T  
   339 
   370 
   340 lazy val Prog: Parser[List[Token], List[Decl]] =
   371 lazy val Prog: Parser[List[Token], List[Decl]] =
   341   (Defn ~ T_SEMI ~ Prog) ==> { case ((x, y), z) => x :: z : List[Decl] } ||
   372   (Defn ~ T_SEMI ~ Prog) ==> { case ((x, y), z) => x :: z : List[Decl] } ||
   342   (Exp ==> ((s) => List(Main(s)) : List[Decl]))
   373   (Exp ==> ((s) => List(Main(s)) : List[Decl]))
   343 
   374 
       
   375 
   344 // compiler - built-in functions 
   376 // compiler - built-in functions 
   345 // copied from http://www.ceng.metu.edu.tr/courses/ceng444/link/jvm-cpm.html
   377 // copied from http://www.ceng.metu.edu.tr/courses/ceng444/link/jvm-cpm.html
   346 //
   378 //
   347 
   379 
   348 val library = """
   380 val library = """
   354         invokenonvirtual java/lang/Object/<init>()V
   386         invokenonvirtual java/lang/Object/<init>()V
   355         return
   387         return
   356 .end method
   388 .end method
   357 
   389 
   358 .method public static write(I)V 
   390 .method public static write(I)V 
   359         .limit locals 5 
   391         .limit locals 1 
   360         .limit stack 5 
   392         .limit stack 2 
   361         iload 0 
       
   362         getstatic java/lang/System/out Ljava/io/PrintStream; 
   393         getstatic java/lang/System/out Ljava/io/PrintStream; 
   363         swap 
   394         iload 0
   364         invokevirtual java/io/PrintStream/println(I)V 
   395         invokevirtual java/io/PrintStream/println(I)V 
   365         return 
   396         return 
   366 .end method
   397 .end method
   367 
   398 
   368 """
   399 """
   369 
   400 
       
   401 // calculating the maximal needed stack size
       
   402 def max_stack_exp(e: Exp): Int = e match {
       
   403   case Call(_, args) => args.map(max_stack_exp).sum
       
   404   case If(a, e1, e2) => max_stack_bexp(a) + (List(max_stack_exp(e1), max_stack_exp(e2)).max)
       
   405   case Write(e) => max_stack_exp(e) + 1
       
   406   case Var(_) => 1
       
   407   case Num(_) => 1
       
   408   case Aop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2)
       
   409   case Sequence(e1, e2) => List(max_stack_exp(e1), max_stack_exp(e2)).max
       
   410 }
       
   411 def max_stack_bexp(e: BExp): Int = e match {
       
   412   case Bop(_, a1, a2) => max_stack_exp(a1) + max_stack_exp(a2)
       
   413 }
       
   414 
       
   415 
   370 // for generating new labels
   416 // for generating new labels
   371 var counter = -1
   417 var counter = -1
   372 
   418 
   373 def Fresh(x: String) = {
   419 def Fresh(x: String) = {
   374   counter += 1
   420   counter += 1
   375   x ++ "_" ++ counter.toString()
   421   x ++ "_" ++ counter.toString()
   376 }
   422 }
   377 
   423 
   378 type Mem = Map[String, Int]
   424 // convenient string interpolations 
   379 type Instrs = List[String]
   425 // for instructions, labels and methods
   380 
   426 import scala.language.implicitConversions
   381 def compile_expT(a: Exp, env : Mem, name: String) : Instrs = a match {
   427 import scala.language.reflectiveCalls
   382   case Num(i) => List("ldc " + i.toString + "\n")
   428 
   383   case Var(s) => List("iload " + env(s).toString + "\n")
   429 implicit def sring_inters(sc: StringContext) = new {
   384   case Aop("+", a1, a2) => compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ List("iadd\n")
   430     def i(args: Any*): String = "   " ++ sc.s(args:_*) ++ "\n"
   385   case Aop("-", a1, a2) => compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ List("isub\n")
   431     def l(args: Any*): String = sc.s(args:_*) ++ ":\n"
   386   case Aop("*", a1, a2) => compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ List("imul\n")
   432     def m(args: Any*): String = sc.s(args:_*) ++ "\n"
   387   case Aop("/", a1, a2) => compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ List("idiv\n")
   433 }
   388   case Aop("%", a1, a2) => compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ List("irem\n")
   434 
       
   435 
       
   436 type Env = Map[String, Int]
       
   437 
       
   438 
       
   439 def compile_expT(a: Exp, env : Env, name: String) : String = a match {
       
   440   case Num(i) => i"ldc $i"
       
   441   case Var(s) => i"iload ${env(s)}"
       
   442   case Aop("+", a1, a2) => compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ i"iadd"
       
   443   case Aop("-", a1, a2) => compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ i"isub"
       
   444   case Aop("*", a1, a2) => compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ i"imul"
       
   445   case Aop("/", a1, a2) => compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ i"idiv"
       
   446   case Aop("%", a1, a2) => compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ i"irem"
   389   case If(b, a1, a2) => {
   447   case If(b, a1, a2) => {
   390     val if_else = Fresh("If_else")
   448     val if_else = Fresh("If_else")
   391     val if_end = Fresh("If_end")
   449     val if_end = Fresh("If_end")
   392     compile_bexpT(b, env, if_else) ++
   450     compile_bexpT(b, env, if_else) ++
   393     compile_expT(a1, env, name) ++
   451     compile_expT(a1, env, name) ++
   394     List("goto " + if_end + "\n") ++
   452     i"goto $if_end" ++
   395     List("\n" + if_else + ":\n\n") ++
   453     l"$if_else" ++
   396     compile_expT(a2, env, name) ++
   454     compile_expT(a2, env, name) ++
   397     List("\n" + if_end + ":\n\n")
   455     l"$if_end"
   398   }
   456   }
   399   case Call(n, args) => if (name == n) { 
   457   case Call(n, args) => if (name == n) { 
   400     val stores = args.zipWithIndex.map { case (x, y) => "istore " + y.toString + "\n" } 
   458     val stores = args.zipWithIndex.map { case (x, y) => i"istore $y" } 
   401     args.flatMap(a => compile_expT(a, env, "")) ++
   459     args.map(a => compile_expT(a, env, "")).mkString ++
   402     stores.reverse ++ 
   460     stores.reverse.mkString ++ 
   403     List ("goto " + n + "_Start\n") 
   461     i"goto ${n}_Start" 
   404   } else {
   462   } else {
   405     val is = "I" * args.length
   463     val is = "I" * args.length
   406     args.flatMap(a => compile_expT(a, env, "")) ++
   464     args.map(a => compile_expT(a, env, "")).mkString ++
   407     List ("invokestatic XXX/XXX/" + n + "(" + is + ")I\n")
   465     i"invokestatic XXX/XXX/${n}(${is})I"
   408   }
   466   }
   409   case Sequ(a1, a2) => {
   467   case Sequence(a1, a2) => {
   410     compile_expT(a1, env, "") ++ List("pop\n") ++ compile_expT(a2, env, name)
   468     compile_expT(a1, env, "") ++ i"pop" ++ compile_expT(a2, env, name)
   411   }
   469   }
   412   case Write(a1) => {
   470   case Write(a1) => {
   413     compile_expT(a1, env, "") ++
   471     compile_expT(a1, env, "") ++
   414     List("dup\n",
   472     i"dup" ++
   415          "invokestatic XXX/XXX/write(I)V\n")
   473     i"invokestatic XXX/XXX/write(I)V"
   416   }
   474   }
   417 }
   475 }
   418 
   476 
   419 def compile_bexpT(b: BExp, env : Mem, jmp: String) : Instrs = b match {
   477 def compile_bexpT(b: BExp, env : Env, jmp: String) : String = b match {
   420   case Bop("==", a1, a2) => 
   478   case Bop("==", a1, a2) => 
   421     compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ List("if_icmpne " + jmp + "\n")
   479     compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ i"if_icmpne $jmp"
   422   case Bop("!=", a1, a2) => 
   480   case Bop("!=", a1, a2) => 
   423     compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ List("if_icmpeq " + jmp + "\n")
   481     compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ i"if_icmpeq $jmp"
   424   case Bop("<", a1, a2) => 
   482   case Bop("<", a1, a2) => 
   425     compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ List("if_icmpge " + jmp + "\n")
   483     compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ i"if_icmpge $jmp"
   426   case Bop("<=", a1, a2) => 
   484   case Bop("<=", a1, a2) => 
   427     compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ List("if_icmpgt " + jmp + "\n")
   485     compile_expT(a1, env, "") ++ compile_expT(a2, env, "") ++ i"if_icmpgt $jmp"
   428 }
   486 }
   429 
   487 
   430 
   488 
   431 def compile_decl(d: Decl) : Instrs = d match {
   489 def compile_decl(d: Decl) : String = d match {
   432   case Def(name, args, a) => { 
   490   case Def(name, args, a) => { 
   433     val env = args.zipWithIndex.toMap
   491     val env = args.zipWithIndex.toMap
   434     val is = "I" * args.length
   492     val is = "I" * args.length
   435     List(".method public static " + name + "(" + is + ")I \n",
   493     m".method public static $name($is)I" ++
   436          ".limit locals " + args.length.toString + "\n",
   494     m".limit locals ${args.length}" ++
   437          ".limit stack " + (1 + max_stack_exp(a)).toString + "\n",
   495     m".limit stack ${1 + max_stack_exp(a)}" ++
   438          name + "_Start:\n") ++   
   496     l"${name}_Start" ++   
   439     compile_expT(a, env, name) ++
   497     compile_expT(a, env, name) ++
   440     List("ireturn\n",
   498     i"ireturn" ++ 
   441          ".end method \n\n")
   499     m".end method\n"
   442   }
   500   }
   443   case Main(a) => {
   501   case Main(a) => {
   444     List(".method public static main([Ljava/lang/String;)V\n",
   502     m".method public static main([Ljava/lang/String;)V" ++
   445          ".limit locals 200\n",
   503     m".limit locals 200" ++
   446          ".limit stack 200\n") ++
   504     m".limit stack 200" ++
   447     compile_expT(a, Map(), "") ++
   505     compile_expT(a, Map(), "") ++
   448     List("invokestatic XXX/XXX/write(I)V\n",
   506     i"invokestatic XXX/XXX/write(I)V" ++
   449          "return\n",
   507     i"return\n" ++
   450          ".end method\n")
   508     m".end method\n"
   451   }
   509   }
   452 }
   510 }
   453 
   511 
   454 def compile(class_name: String, input: String) : String = {
   512 // main compiler functions
   455   val tks = tokenizer(input)
       
   456   //println(Prog.parse(tks))
       
   457   val ast = Prog.parse_single(tks)
       
   458   val instructions = ast.flatMap(compile_decl).mkString
       
   459   (library + instructions).replaceAllLiterally("XXX", class_name)
       
   460 }
       
   461 
       
   462 
       
   463 def compile_file(file_name: String) = {
       
   464   val class_name = file_name.split('.')(0)
       
   465   val output = compile(class_name, fromFile(file_name))
       
   466   val fw = new java.io.FileWriter(class_name + ".j") 
       
   467   fw.write(output) 
       
   468   fw.close()
       
   469 }
       
   470 
   513 
   471 def time_needed[T](i: Int, code: => T) = {
   514 def time_needed[T](i: Int, code: => T) = {
   472   val start = System.nanoTime()
   515   val start = System.nanoTime()
   473   for (j <- 1 to i) code
   516   for (j <- 1 to i) code
   474   val end = System.nanoTime()
   517   val end = System.nanoTime()
   475   (end - start)/(i * 1.0e9)
   518   (end - start)/(i * 1.0e9)
   476 }
   519 }
   477 
   520 
   478 def compile_run(file_name: String) : Unit = {
   521 def compile(class_name: String, input: String) : String = {
   479   val class_name = file_name.split('.')(0)
   522   val tks = tokenise(input)
   480   compile_file(file_name)
   523   val ast = Prog.parse_single(tks)
   481   val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!!
   524   val instructions = ast.map(compile_decl).mkString
   482   println("Time: " + time_needed(2, ("java " + class_name + "/" + class_name).!))
   525   (library + instructions).replaceAllLiterally("XXX", class_name)
       
   526 }
       
   527 
       
   528 def compile_file(class_name: String) = {
       
   529   val input = io.Source.fromFile(s"${class_name}.fun").mkString
       
   530   val output = compile(class_name, input)
       
   531   scala.tools.nsc.io.File(s"${class_name}.j").writeAll(output)
       
   532 }
       
   533 
       
   534 import scala.sys.process._
       
   535 
       
   536 def compile_run(class_name: String) : Unit = {
       
   537   compile_file(class_name)
       
   538   (s"java -jar jvm/jasmin-2.4/jasmin.jar ${class_name}.j").!!
       
   539   println("Time: " + time_needed(2, (s"java ${class_name}/${class_name}").!))
   483 }
   540 }
   484 
   541 
   485 
   542 
   486 //examples
   543 //examples
   487 compile_run("defs.rec")
   544 compile_run("defs")
   488 //compile_run("fact.rec")
   545 compile_run("fact")
   489 
   546 
   490 
   547 
   491 
   548 
   492 
   549 
   493 
   550 
   494 // a problem with the parser 
   551 // a problem with the parser 
   495 /*
   552 /*
   496 val text = "(((((1)))))"
   553 val text = "(((((1)))))"
   497 val tokens = tokenizer(text)
   554 val tokens = tokenise(text)
   498 println(tokens)
   555 println(tokens)
   499 val ast = Prog.parse_single(tokens)
   556 val ast = Prog.parse_single(tokens)
   500 println(ast)
   557 println(ast)
   501 
   558 
   502 Exp.parse_single(tokens)
   559 Exp.parse_single(tokens)
   503 
   560 
   504 
   561 
   505 
   562 
   506 val text = "((((1))))"
   563 val text = "((((1))))"
   507 val tokens = tokenizer(text)
   564 val tokens = tokenise(text)
   508 println(tokens)
   565 println(tokens)
   509 Exp.parse(tokens)
   566 Exp.parse(tokens)
   510 */
   567 */