|      1 // A parser for the Fun language |         | 
|      2 //================================ |         | 
|      3 // |         | 
|      4 // call with  |         | 
|      5 // |         | 
|      6 //     amm fun_parser.sc fact.fun |         | 
|      7 // |         | 
|      8 //     amm fun_parser.sc defs.fun |         | 
|      9 // |         | 
|     10 // this will generate a parse-tree from a list |         | 
|     11 // of tokens |         | 
|     12  |         | 
|     13 import scala.language.implicitConversions     |         | 
|     14 import scala.language.reflectiveCalls |         | 
|     15  |         | 
|     16 import $file.fun_tokens, fun_tokens._  |         | 
|     17  |         | 
|     18  |         | 
|     19 // Parser combinators |         | 
|     20 //    type parameter I needs to be of Seq-type |         | 
|     21 // |         | 
|     22 abstract class Parser[I, T](implicit ev: I => Seq[_]) { |         | 
|     23   def parse(ts: I): Set[(T, I)] |         | 
|     24  |         | 
|     25   def parse_single(ts: I) : T =  |         | 
|     26     parse(ts).partition(_._2.isEmpty) match { |         | 
|     27       case (good, _) if !good.isEmpty => good.head._1 |         | 
|     28       case (good, err) if err.isEmpty => { |         | 
|     29         println (s"Parse Error\n $good \n $err") ; sys.exit(-1) } |         | 
|     30       case (_, err) => {  |         | 
|     31 	println (s"Parse Error\n${err.minBy(_._2.length)}") ; sys.exit(-1) } |         | 
|     32     } |         | 
|     33 } |         | 
|     34  |         | 
|     35 // convenience for writing grammar rules |         | 
|     36 case class ~[+A, +B](_1: A, _2: B) |         | 
|     37  |         | 
|     38 class SeqParser[I, T, S](p: => Parser[I, T],  |         | 
|     39                          q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, ~[T, S]] { |         | 
|     40   def parse(sb: I) =  |         | 
|     41     for ((head1, tail1) <- p.parse(sb);  |         | 
|     42          (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2) |         | 
|     43 } |         | 
|     44  |         | 
|     45 class AltParser[I, T](p: => Parser[I, T],  |         | 
|     46                       q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] { |         | 
|     47   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)    |         | 
|     48 } |         | 
|     49  |         | 
|     50 class FunParser[I, T, S](p: => Parser[I, T],  |         | 
|     51                          f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] { |         | 
|     52   def parse(sb: I) =  |         | 
|     53     for ((head, tail) <- p.parse(sb)) yield (f(head), tail) |         | 
|     54 } |         | 
|     55  |         | 
|     56 // convenient combinators |         | 
|     57 implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new { |         | 
|     58   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q) |         | 
|     59   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f) |         | 
|     60   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) |         | 
|     61 } |         | 
|     62  |         | 
|     63 def ListParser[I, T, S](p: => Parser[I, T],  |         | 
|     64                         q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = { |         | 
|     65   (p ==> ((s) => List(s))) || |         | 
|     66   (p ~ q ~ ListParser(p, q)) ==> { case x ~ _ ~ z => x :: z : List[T] } |         | 
|     67 } |         | 
|     68  |         | 
|     69 case class TokParser(tok: Token) extends Parser[List[Token], Token] { |         | 
|     70   def parse(ts: List[Token]) = ts match { |         | 
|     71     case t::ts if (t == tok) => Set((t, ts))  |         | 
|     72     case _ => Set() |         | 
|     73   } |         | 
|     74 } |         | 
|     75  |         | 
|     76 implicit def token2tparser(t: Token) = TokParser(t) |         | 
|     77  |         | 
|     78 implicit def TokOps(t: Token) = new { |         | 
|     79   def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q) |         | 
|     80   def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f) |         | 
|     81   def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q) |         | 
|     82 } |         | 
|     83  |         | 
|     84 case object EmptyParser extends Parser[List[Token], String] { |         | 
|     85   def parse(ts: List[Token]) = Set(("", ts)) |         | 
|     86 } |         | 
|     87  |         | 
|     88 case object NumParser extends Parser[List[Token], Int] { |         | 
|     89   def parse(ts: List[Token]) = ts match { |         | 
|     90     case T_NUM(n)::ts => Set((n, ts))  |         | 
|     91     case _ => Set () |         | 
|     92   } |         | 
|     93 } |         | 
|     94  |         | 
|     95 case object FNumParser extends Parser[List[Token], Double] { |         | 
|     96   def parse(ts: List[Token]) = ts match { |         | 
|     97     case T_FNUM(x)::ts => Set((x, ts))  |         | 
|     98     case _ => Set() |         | 
|     99   } |         | 
|    100 } |         | 
|    101  |         | 
|    102 case object IdParser extends Parser[List[Token], String] { |         | 
|    103   def parse(ts: List[Token]) = ts match { |         | 
|    104     case T_ID(s)::ts => Set((s, ts))  |         | 
|    105     case _ => Set () |         | 
|    106   } |         | 
|    107 } |         | 
|    108  |         | 
|    109 case object CharConstParser extends Parser[List[Token], Int] { |         | 
|    110   def parse(ts: List[Token]) = ts match { |         | 
|    111     case T_CHR(c)::ts => Set((c, ts))  |         | 
|    112     case _ => Set () |         | 
|    113   } |         | 
|    114 } |         | 
|    115  |         | 
|    116 case object TyParser extends Parser[List[Token], String] { |         | 
|    117   def parse(ts: List[Token]) = ts match { |         | 
|    118     case T_TY(s)::ts => Set((s, ts))  |         | 
|    119     case _ => Set () |         | 
|    120   } |         | 
|    121 } |         | 
|    122  |         | 
|    123  |         | 
|    124 // Abstract syntax trees for the Fun language |         | 
|    125 abstract class Exp  |         | 
|    126 abstract class BExp  |         | 
|    127 abstract class Decl  |         | 
|    128  |         | 
|    129 case class Def(name: String, args: List[(String, String)], ty: String, body: Exp) extends Decl |         | 
|    130 case class Main(e: Exp) extends Decl |         | 
|    131 case class Const(name: String, v: Int) extends Decl |         | 
|    132 case class FConst(name: String, x: Double) extends Decl |         | 
|    133  |         | 
|    134 case class Call(name: String, args: List[Exp]) extends Exp |         | 
|    135 case class If(a: BExp, e1: Exp, e2: Exp) extends Exp |         | 
|    136 case class Var(s: String) extends Exp |         | 
|    137 case class Num(i: Int) extends Exp     // integer numbers |         | 
|    138 case class FNum(i: Double) extends Exp  // floating numbers |         | 
|    139 case class ChConst(c: Int) extends Exp // char constant |         | 
|    140 case class Aop(o: String, a1: Exp, a2: Exp) extends Exp |         | 
|    141 case class Sequence(e1: Exp, e2: Exp) extends Exp |         | 
|    142 case class Bop(o: String, a1: Exp, a2: Exp) extends BExp |         | 
|    143  |         | 
|    144  |         | 
|    145 // arithmetic expressions (there needs to be an F in the SEMICOLON case) |         | 
|    146 lazy val Exp: Parser[List[Token], Exp] =  |         | 
|    147   (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==> |         | 
|    148     { case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z): Exp } || |         | 
|    149   (F ~ T_SEMI ~ Exp) ==> { case x ~ _ ~ y => Sequence(x, y): Exp } || L |         | 
|    150 lazy val L: Parser[List[Token], Exp] =  |         | 
|    151   (T ~ T_OP("+") ~ Exp) ==> { case x ~ _ ~ z => Aop("+", x, z): Exp } || |         | 
|    152   (T ~ T_OP("-") ~ Exp) ==> { case x ~ _ ~ z => Aop("-", x, z): Exp } || T   |         | 
|    153 lazy val T: Parser[List[Token], Exp] =  |         | 
|    154   (F ~ T_OP("*") ~ T) ==> { case x ~ _ ~ z => Aop("*", x, z): Exp } ||  |         | 
|    155   (F ~ T_OP("/") ~ T) ==> { case x ~ _ ~ z => Aop("/", x, z): Exp } ||  |         | 
|    156   (F ~ T_OP("%") ~ T) ==> { case x ~ _ ~ z => Aop("%", x, z): Exp } || F |         | 
|    157 lazy val F: Parser[List[Token], Exp] =  |         | 
|    158   (IdParser ~ T_LPAREN ~ T_RPAREN) ==> { case x ~ _ ~ _ => Call(x, Nil): Exp } || |         | 
|    159   (IdParser ~ T_LPAREN ~ ListParser(Exp, T_COMMA) ~ T_RPAREN) ==> { case x ~ _ ~ z ~ _ => Call(x, z): Exp } || |         | 
|    160   (T_LPAREN ~ Exp ~ T_RPAREN) ==> { case _ ~ y ~ _ => y: Exp } ||  |         | 
|    161   IdParser ==> { case x => Var(x): Exp } ||  |         | 
|    162   NumParser ==> { case x => Num(x): Exp } || |         | 
|    163   CharConstParser ==> { case x => ChConst(x): Exp } || |         | 
|    164   FNumParser ==> { case x => FNum(x): Exp } |         | 
|    165  |         | 
|    166 // boolean expressions |         | 
|    167 lazy val BExp: Parser[List[Token], BExp] =  |         | 
|    168   (Exp ~ T_OP("==") ~ Exp) ==> { case x ~ _ ~ z => Bop("==", x, z): BExp } ||  |         | 
|    169   (Exp ~ T_OP("!=") ~ Exp) ==> { case x ~ _ ~ z => Bop("!=", x, z): BExp } ||  |         | 
|    170   (Exp ~ T_OP("<") ~ Exp)  ==> { case x ~ _ ~ z => Bop("<",  x, z): BExp } ||  |         | 
|    171   (Exp ~ T_OP(">") ~ Exp)  ==> { case x ~ _ ~ z => Bop("<",  z, x): BExp } ||  |         | 
|    172   (Exp ~ T_OP("<=") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", x, z): BExp } ||  |         | 
|    173   (Exp ~ T_OP("=>") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", z, x): BExp } || |         | 
|    174   (T_LPAREN ~ BExp ~ T_RPAREN) ==> { case _ ~ b ~ _ => b : BExp }  |         | 
|    175  |         | 
|    176 lazy val Arg : Parser[List[Token], (String, String)] =  |         | 
|    177   (IdParser ~ T_COLON ~ TyParser) ==> { case x ~ _ ~ ty => (x, ty) }   |         | 
|    178  |         | 
|    179 lazy val Defn: Parser[List[Token], Decl] = { |         | 
|    180    (T_KWD("def") ~ IdParser ~ T_LPAREN ~ T_RPAREN ~ T_COLON ~ TyParser ~ T_OP("=") ~ Exp) ==> |         | 
|    181      { case _ ~ y ~ _ ~ _ ~ _~ ty ~ _ ~ r => Def(y, Nil, ty, r): Decl } || |         | 
|    182    (T_KWD("def") ~ IdParser ~ T_LPAREN ~ ListParser(Arg, T_COMMA) ~ T_RPAREN ~ T_COLON ~ TyParser ~ T_OP("=") ~ Exp) ==> |         | 
|    183      { case _ ~ y ~ _ ~ w ~ _ ~ _~ ty ~ _ ~ r => Def(y, w, ty, r): Decl } |         | 
|    184 } |         | 
|    185  |         | 
|    186 lazy val Const_decl: Parser[List[Token], Decl] = |         | 
|    187    (T_KWD("val") ~ Arg ~ T_OP("=") ~ NumParser) ==> |         | 
|    188      { case _ ~ x ~ _ ~ v => Const(x._1, v): Decl } || |         | 
|    189    (T_KWD("val") ~ Arg ~ T_OP("=") ~ FNumParser) ==> |         | 
|    190      { case _ ~ x ~ _ ~ v => FConst(x._1, v): Decl }  |         | 
|    191  |         | 
|    192 lazy val Prog: Parser[List[Token], List[Decl]] = |         | 
|    193   (Defn ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } || |         | 
|    194   (Const_decl ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } || |         | 
|    195   (Exp ==> ((s) => List(Main(s)) : List[Decl])) |         | 
|    196  |         | 
|    197  |         | 
|    198  |         | 
|    199 // Reading tokens and Writing parse trees |         | 
|    200  |         | 
|    201 //import ammonite.ops._ |         | 
|    202  |         | 
|    203 def parse_tks(tks: List[Token]) : List[Decl] = { |         | 
|    204   //println(Prog.parse(tks)) |         | 
|    205   Prog.parse_single(tks) |         | 
|    206 } |         | 
|    207  |         | 
|    208 //@doc("Parses a file.") |         | 
|    209 @main |         | 
|    210 def main(fname: String) : Unit = { |         | 
|    211   val tks = tokenise(os.read(os.pwd / fname)) |         | 
|    212   println(parse_tks(tks)) |         | 
|    213 } |         | 
|    214  |         | 
|    215  |         |