// Author: Zhuo Ying Jiang Li
// Starting code by Dr Christian Urban
// parser: convert sequence of tokens to AST
//
// Use this command to print parsed AST:
// amm fun_parser.sc <name>.fun
//
import $file.fun_tokens, fun_tokens._
// more convenience for the map parsers later on;
// it allows writing nested patterns as
// case x ~ y ~ z => ...
case class ~[+A, +B](x: A, y: B)
// parser combinators
abstract class Parser[I, T](using is: I => Seq[_]) {
def parse(in: I): Set[(T, I)]
def parse_all(in: I) : Set[T] =
for ((hd, tl) <- parse(in);
if is(tl).isEmpty) yield hd
}
// alternative parser
class AltParser[I, T](p: => Parser[I, T],
q: => Parser[I, T])(using I => Seq[_]) extends Parser[I, T] {
def parse(in: I) = p.parse(in) ++ q.parse(in)
}
// sequence parser
class SeqParser[I, T, S](p: => Parser[I, T],
q: => Parser[I, S])(using I => Seq[_]) extends Parser[I, ~[T, S]] {
def parse(in: I) =
for ((hd1, tl1) <- p.parse(in);
(hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)
}
// map parser
class MapParser[I, T, S](p: => Parser[I, T],
f: T => S)(using I => Seq[_]) extends Parser[I, S] {
def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
}
// more convenient syntax for parser combinators
extension [I, T](p: Parser[I, T])(using I => Seq[_]) {
def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
}
// -------------------------------------------------
// atomic parsers
// atomic parser for types
case class TypeParser(ty: Set[String]) extends Parser[Tokens, String] {
def parse(tokens: Tokens) = tokens match {
case Nil => Set()
case tk::tkns if tk._1 == "type" && ty.contains(tk._2) => Set((tk._2, tkns))
case _ => Set()
}
}
// atomic parser for global ids
case object GlobalIdParser extends Parser[Tokens, String] {
def parse(tokens: Tokens) = tokens match {
case Nil => Set()
case tk::tkns if tk._1 == "global" => Set((tk._2, tkns))
case _ => Set()
}
}
// atomic parser for ids
case object IdParser extends Parser[Tokens, String] {
def parse(tokens: Tokens) = tokens match {
case Nil => Set()
case tk::tkns if tk._1 == "id" => Set((tk._2, tkns))
case _ => Set()
}
}
// atomic parser for doubles (I use Float because that's what is used in the AST structures given in CW5)
case object DoubleParser extends Parser[Tokens, Float] {
def parse(tokens: Tokens) = tokens match {
case Nil => Set()
case tk::tkns if tk._1 == "double" => Set((tk._2.toFloat, tkns))
case _ => Set()
}
}
// atomic parser for integers
case object IntParser extends Parser[Tokens, Int] {
def parse(tokens: Tokens) = tokens match {
case Nil => Set()
case tk::tkns if tk._1 == "int" => Set((tk._2.toInt, tkns))
case _ => Set()
}
}
// atomic parser for operators
case class OpParser(ops: Set[String]) extends Parser[Tokens, String] {
def parse(tokens: Tokens) = tokens match {
case Nil => Set()
case tk::tkns if tk._1 == "op" && ops.contains(tk._2) => Set((tk._2, tkns))
case _ => Set()
}
}
// atomic parser for character
case object CharParser extends Parser[Tokens, Char] {
def parse(tokens: Tokens) = tokens match {
case Nil => Set()
case tk::tkns if tk._1 == "ch" => {
val stripped = tk._2.slice(1, tk._2.length-1) // strip off single quotes
stripped match {
case "\\n" => Set(('\n', tkns))
case "\\t" => Set(('\t', tkns))
case "\\r" => Set(('\r', tkns))
case c => Set((c(0), tkns))
}
}
case _ => Set()
}
}
// parser for list of arguments
def ListParser[I, T, S](p: => Parser[I, T],
q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
(p ~ q ~ ListParser(p, q)).map{ case x ~ _ ~ z => x :: z : List[T] } ||
(p.map((s) => List(s)))
}
// I may want to write string interpolations for:
// keywords, semicolon, colon, comma, parentheses
case class StrParser(s: String) extends Parser[Tokens, String] {
def parse(tokens: Tokens) = tokens match {
case Nil => Set()
case tk::tkns if tk._2 == s => Set((s, tkns))
case _ => Set()
}
}
extension (sc: StringContext) {
def p(args: Any*) = StrParser(sc.s(args:_*))
}
// the AST datastructures for the FUN language
abstract class Exp
abstract class BExp
abstract class Decl
case class Def(name: String, args: List[(String, String)], ty: String, body: Exp) extends Decl
case class Main(e: Exp) extends Decl
case class Const(name: String, v: Int) extends Decl
case class FConst(name: String, x: Float) extends Decl
case class Call(name: String, args: List[Exp]) extends Exp
case class If(a: BExp, e1: Exp, e2: Exp) extends Exp
case class Var(s: String) extends Exp
case class Num(i: Int) extends Exp // integer numbers
case class FNum(i: Float) extends Exp // float numbers
case class ChConst(c: Int) extends Exp // character constants
case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
case class Sequence(e1: Exp, e2: Exp) extends Exp // expressions separated by semicolons
case class Bop(o: String, a1: Exp, a2: Exp) extends BExp
lazy val Exps: Parser[Tokens, Exp] =
(Exp ~ p";" ~ Exps).map[Exp]{ case x ~ _ ~ z => Sequence(x, z) } ||
Exp
lazy val Exp: Parser[Tokens, Exp] =
(p"if" ~ BExp ~ p"then" ~ Exp ~ p"else" ~ Exp).map[Exp]{ case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z) } ||
M
lazy val M: Parser[Tokens, Exp] =
(T ~ OpParser(Set("+", "-")) ~ M).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } ||
T
lazy val T: Parser[Tokens, Exp] =
(U ~ OpParser(Set("*", "/", "%")) ~ T).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } ||
U
// includes negative factor
// a + - b CAN be recognised
// - - - b CAN be recognised
lazy val U: Parser[Tokens, Exp] =
(OpParser(Set("-")) ~ U).map[Exp]{ case _ ~ y => Aop("*", Num(-1), y) } ||
(OpParser(Set("+")) ~ U).map[Exp]{ case _ ~ y => y } ||
F
lazy val F: Parser[Tokens, Exp] =
(p"(" ~ Exp ~ p")").map[Exp]{ case _ ~ y ~ _ => y } ||
(p"skip").map(_ => Call("skip", Nil)) || // hardcoded
(p"skip" ~ p"(" ~ p")").map(_ => Call("skip", Nil)) || // hardcoded
(IdParser ~ p"(" ~ ListParser(Exp, p",") ~ p")").map[Exp]{ case id ~ _ ~ args ~ _ => Call(id, args) } ||
(IdParser ~ p"(" ~ p")").map[Exp]{ case id ~ _ ~ _ => Call(id, Nil) } || // NOTE: empty args are also accepted!
(IdParser || GlobalIdParser).map(x => Var(x)) ||
IntParser.map(x => Num(x)) ||
DoubleParser.map(x => FNum(x)) ||
CharParser.map(x => ChConst(x.toInt)) ||
(p"{" ~ Exps ~ p"}").map[Exp]{ case _ ~ x ~ _ => x }
lazy val BExp: Parser[Tokens, BExp] =
(Exp ~ OpParser(Set("==", "!=", "<", ">", "<=", ">=")) ~ Exp).map[BExp]{ case x ~ y ~ z => Bop(y, x, z) } ||
(p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ y ~ _ => y }
lazy val TypedIdParser: Parser[Tokens, (String, String)] =
(IdParser ~ p":" ~ TypeParser(Set("Int", "Double"))).map{ case n ~ _ ~ t => (n, t) }
lazy val Defn: Parser[Tokens, Decl] =
(p"def" ~ IdParser ~ p"(" ~ ListParser(TypedIdParser, p",") ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{
case _ ~ y ~ _ ~ w ~ _ ~ _ ~ t ~ _ ~ b => Def(y, w, t, b)
} ||
(p"def" ~ IdParser ~ p"(" ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{
case _ ~ y ~ _ ~ _ ~ _ ~ t ~ _ ~ b => Def(y, Nil, t, b)
}
lazy val Constp: Parser[Tokens, Decl] =
(p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ IntParser).map[Decl]{ // IntParser? Not Exp? For this AST, impossible to define Exp
case _ ~ id ~ _ ~ _ ~ _ ~ n => Const(id, n)
} ||
(p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ IntParser).map[Decl]{ // IntParser? Not Exp? For this AST, impossible to define Exp
case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => Const(id, -n)
}
// Int can be converted to Double but not viceversa
lazy val FConstp: Parser[Tokens, Decl] =
(p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{
case _ ~ id ~ _ ~ _ ~ _ ~ n => FConst(id, n)
} ||
(p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{
case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => FConst(id, -n)
}
// Prog consists of global const declarations, f(x) defs, and exp in ANY order
// restricted to main body at the bottom
lazy val Prog: Parser[Tokens, List[Decl]] =
(Defn ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
(Constp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
(FConstp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
Exp.map[List[Decl]](s => List(Main(s)))
def parse_tks(tokens: Tokens) = Prog.parse_all(tokens)
import scala.io.Source._
@main
def parse(filename: String) = {
val fun_code = fromFile(filename).getLines.mkString("\n")
// print the AST list to screen
println(parse_tks(tokenise(fun_code)))
}