--- a/solutions/cw5/fun_parser.sc Sat Dec 03 21:58:47 2022 +0000
+++ b/solutions/cw5/fun_parser.sc Fri Dec 09 11:00:05 2022 +0000
@@ -1,215 +1,263 @@
-// A parser for the Fun language
-//================================
-//
-// call with
-//
-// amm fun_parser.sc fact.fun
+// Author: Zhuo Ying Jiang Li
+// Starting code by Dr Christian Urban
+
+// parser: convert sequence of tokens to AST
+
//
-// amm fun_parser.sc defs.fun
+// Use this command to print parsed AST:
+// amm fun_parser.sc <name>.fun
//
-// this will generate a parse-tree from a list
-// of tokens
-import scala.language.implicitConversions
-import scala.language.reflectiveCalls
+import $file.fun_tokens, fun_tokens._
-import $file.fun_tokens, fun_tokens._
-
-
-// Parser combinators
-// type parameter I needs to be of Seq-type
-//
-abstract class Parser[I, T](implicit ev: I => Seq[_]) {
- def parse(ts: I): Set[(T, I)]
+// more convenience for the map parsers later on;
+// it allows writing nested patterns as
+// case x ~ y ~ z => ...
+case class ~[+A, +B](x: A, y: B)
- def parse_single(ts: I) : T =
- parse(ts).partition(_._2.isEmpty) match {
- case (good, _) if !good.isEmpty => good.head._1
- case (good, err) if err.isEmpty => {
- println (s"Parse Error\n $good \n $err") ; sys.exit(-1) }
- case (_, err) => {
- println (s"Parse Error\n${err.minBy(_._2.length)}") ; sys.exit(-1) }
- }
+// constraint for the input
+type IsSeq[A] = A => Seq[_]
+
+abstract class Parser[I : IsSeq, T]{
+ def parse(in: I): Set[(T, I)]
+
+ def parse_all(in: I) : Set[T] =
+ for ((hd, tl) <- parse(in);
+ if tl.isEmpty) yield hd
}
-// convenience for writing grammar rules
-case class ~[+A, +B](_1: A, _2: B)
+// parser combinators
-class SeqParser[I, T, S](p: => Parser[I, T],
- q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, ~[T, S]] {
- def parse(sb: I) =
- for ((head1, tail1) <- p.parse(sb);
- (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2)
+// sequence parser
+class SeqParser[I : IsSeq, T, S](p: => Parser[I, T],
+ q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
+ def parse(in: I) =
+ for ((hd1, tl1) <- p.parse(in);
+ (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)
}
-class AltParser[I, T](p: => Parser[I, T],
- q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] {
- def parse(sb: I) = p.parse(sb) ++ q.parse(sb)
+// alternative parser
+class AltParser[I : IsSeq, T](p: => Parser[I, T],
+ q: => Parser[I, T]) extends Parser[I, T] {
+ def parse(in: I) = p.parse(in) ++ q.parse(in)
}
-class FunParser[I, T, S](p: => Parser[I, T],
- f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] {
- def parse(sb: I) =
- for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
+// map parser
+class MapParser[I : IsSeq, T, S](p: => Parser[I, T],
+ f: T => S) extends Parser[I, S] {
+ def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
}
-// convenient combinators
-implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
- def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
- def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
+// more convenient syntax for parser combinators
+implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new {
+ def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
+ def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
}
-def ListParser[I, T, S](p: => Parser[I, T],
- q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
- (p ==> ((s) => List(s))) ||
- (p ~ q ~ ListParser(p, q)) ==> { case x ~ _ ~ z => x :: z : List[T] }
-}
+// -------------------------------------------------
+// atomic parsers
-case class TokParser(tok: Token) extends Parser[List[Token], Token] {
- def parse(ts: List[Token]) = ts match {
- case t::ts if (t == tok) => Set((t, ts))
+// atomic parser for types
+case class TypeParser(ty: Set[String]) extends Parser[Tokens, String] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._1 == "type" && ty.contains(tk._2) => Set((tk._2, tkns))
case _ => Set()
}
}
-implicit def token2tparser(t: Token) = TokParser(t)
-
-implicit def TokOps(t: Token) = new {
- def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q)
- def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f)
- def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q)
+// atomic parser for global ids
+case object GlobalIdParser extends Parser[Tokens, String] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._1 == "global" => Set((tk._2, tkns))
+ case _ => Set()
+ }
}
-case object EmptyParser extends Parser[List[Token], String] {
- def parse(ts: List[Token]) = Set(("", ts))
+// atomic parser for ids
+case object IdParser extends Parser[Tokens, String] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._1 == "id" => Set((tk._2, tkns))
+ case _ => Set()
+ }
}
-case object NumParser extends Parser[List[Token], Int] {
- def parse(ts: List[Token]) = ts match {
- case T_NUM(n)::ts => Set((n, ts))
- case _ => Set ()
+// atomic parser for doubles (I use Float because that's what is used in the AST structures given in CW5)
+case object DoubleParser extends Parser[Tokens, Float] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._1 == "double" => Set((tk._2.toFloat, tkns))
+ case _ => Set()
}
}
-case object FNumParser extends Parser[List[Token], Double] {
- def parse(ts: List[Token]) = ts match {
- case T_FNUM(x)::ts => Set((x, ts))
+// atomic parser for integers
+case object IntParser extends Parser[Tokens, Int] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._1 == "int" => Set((tk._2.toInt, tkns))
+ case _ => Set()
+ }
+}
+
+// atomic parser for operators
+case class OpParser(ops: Set[String]) extends Parser[Tokens, String] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._1 == "op" && ops.contains(tk._2) => Set((tk._2, tkns))
+ case _ => Set()
+ }
+}
+
+// atomic parser for character
+case object CharParser extends Parser[Tokens, Char] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._1 == "ch" => {
+ val stripped = tk._2.slice(1, tk._2.length-1) // strip off single quotes
+ stripped match {
+ case "\\n" => Set(('\n', tkns))
+ case "\\t" => Set(('\t', tkns))
+ case "\\r" => Set(('\r', tkns))
+ case c => Set((c(0), tkns))
+ }
+ }
case _ => Set()
}
}
-case object IdParser extends Parser[List[Token], String] {
- def parse(ts: List[Token]) = ts match {
- case T_ID(s)::ts => Set((s, ts))
- case _ => Set ()
+// parser for list of arguments
+def ListParser[I, T, S](p: => Parser[I, T],
+ q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
+ (p ~ q ~ ListParser(p, q)).map{ case x ~ _ ~ z => x :: z : List[T] } ||
+ (p.map((s) => List(s)))
+}
+
+// I may want to write string interpolations for:
+// keywords, semicolon, colon, comma, parentheses
+case class StrParser(s: String) extends Parser[Tokens, String] {
+ def parse(tokens: Tokens) = tokens match {
+ case Nil => Set()
+ case tk::tkns if tk._2 == s => Set((s, tkns))
+ case _ => Set()
}
}
-case object CharConstParser extends Parser[List[Token], Int] {
- def parse(ts: List[Token]) = ts match {
- case T_CHR(c)::ts => Set((c, ts))
- case _ => Set ()
- }
-}
-
-case object TyParser extends Parser[List[Token], String] {
- def parse(ts: List[Token]) = ts match {
- case T_TY(s)::ts => Set((s, ts))
- case _ => Set ()
- }
+implicit def parser_interpolation(sc: StringContext) = new {
+ def p(args: Any*) = StrParser(sc.s(args:_*))
}
-// Abstract syntax trees for the Fun language
-abstract class Exp
-abstract class BExp
-abstract class Decl
+// the AST datastructures for the FUN language
+
+abstract class Exp
+abstract class BExp
+abstract class Decl
case class Def(name: String, args: List[(String, String)], ty: String, body: Exp) extends Decl
case class Main(e: Exp) extends Decl
case class Const(name: String, v: Int) extends Decl
-case class FConst(name: String, x: Double) extends Decl
+case class FConst(name: String, x: Float) extends Decl
case class Call(name: String, args: List[Exp]) extends Exp
case class If(a: BExp, e1: Exp, e2: Exp) extends Exp
case class Var(s: String) extends Exp
-case class Num(i: Int) extends Exp // integer numbers
-case class FNum(i: Double) extends Exp // floating numbers
-case class ChConst(c: Int) extends Exp // char constant
+case class Num(i: Int) extends Exp // integer numbers
+case class FNum(i: Float) extends Exp // float numbers
+case class ChConst(c: Int) extends Exp // character constants
case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
-case class Sequence(e1: Exp, e2: Exp) extends Exp
+case class Sequence(e1: Exp, e2: Exp) extends Exp // expressions separated by semicolons
+
case class Bop(o: String, a1: Exp, a2: Exp) extends BExp
-// arithmetic expressions (there needs to be an F in the SEMICOLON case)
-lazy val Exp: Parser[List[Token], Exp] =
- (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==>
- { case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z): Exp } ||
- (F ~ T_SEMI ~ Exp) ==> { case x ~ _ ~ y => Sequence(x, y): Exp } || L
-lazy val L: Parser[List[Token], Exp] =
- (T ~ T_OP("+") ~ Exp) ==> { case x ~ _ ~ z => Aop("+", x, z): Exp } ||
- (T ~ T_OP("-") ~ Exp) ==> { case x ~ _ ~ z => Aop("-", x, z): Exp } || T
-lazy val T: Parser[List[Token], Exp] =
- (F ~ T_OP("*") ~ T) ==> { case x ~ _ ~ z => Aop("*", x, z): Exp } ||
- (F ~ T_OP("/") ~ T) ==> { case x ~ _ ~ z => Aop("/", x, z): Exp } ||
- (F ~ T_OP("%") ~ T) ==> { case x ~ _ ~ z => Aop("%", x, z): Exp } || F
-lazy val F: Parser[List[Token], Exp] =
- (IdParser ~ T_LPAREN ~ T_RPAREN) ==> { case x ~ _ ~ _ => Call(x, Nil): Exp } ||
- (IdParser ~ T_LPAREN ~ ListParser(Exp, T_COMMA) ~ T_RPAREN) ==> { case x ~ _ ~ z ~ _ => Call(x, z): Exp } ||
- (T_LPAREN ~ Exp ~ T_RPAREN) ==> { case _ ~ y ~ _ => y: Exp } ||
- IdParser ==> { case x => Var(x): Exp } ||
- NumParser ==> { case x => Num(x): Exp } ||
- CharConstParser ==> { case x => ChConst(x): Exp } ||
- FNumParser ==> { case x => FNum(x): Exp }
+lazy val Exps: Parser[Tokens, Exp] =
+ (Exp ~ p";" ~ Exps).map[Exp]{ case x ~ _ ~ z => Sequence(x, z) } ||
+ Exp
+
+lazy val Exp: Parser[Tokens, Exp] =
+ (p"if" ~ BExp ~ p"then" ~ Exp ~ p"else" ~ Exp).map[Exp]{ case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z) } ||
+ M
+
+lazy val M: Parser[Tokens, Exp] =
+ (T ~ OpParser(Set("+", "-")) ~ M).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } ||
+ T
+
+lazy val T: Parser[Tokens, Exp] =
+ (U ~ OpParser(Set("*", "/", "%")) ~ T).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } ||
+ U
+
+// includes negative factor
+// a + - b CAN be recognised
+// - - - b CAN be recognised
+lazy val U: Parser[Tokens, Exp] =
+ (OpParser(Set("-")) ~ U).map[Exp]{ case _ ~ y => Aop("*", Num(-1), y) } ||
+ (OpParser(Set("+")) ~ U).map[Exp]{ case _ ~ y => y } ||
+ F
+
+lazy val F: Parser[Tokens, Exp] =
+ (p"(" ~ Exp ~ p")").map[Exp]{ case _ ~ y ~ _ => y } ||
+ (p"skip").map(_ => Call("skip", Nil)) || // hardcoded
+ (p"skip" ~ p"(" ~ p")").map(_ => Call("skip", Nil)) || // hardcoded
+ (IdParser ~ p"(" ~ ListParser(Exp, p",") ~ p")").map[Exp]{ case id ~ _ ~ args ~ _ => Call(id, args) } ||
+ (IdParser ~ p"(" ~ p")").map[Exp]{ case id ~ _ ~ _ => Call(id, Nil) } || // NOTE: empty args are also accepted!
+ (IdParser || GlobalIdParser).map(x => Var(x)) ||
+ IntParser.map(x => Num(x)) ||
+ DoubleParser.map(x => FNum(x)) ||
+ CharParser.map(x => ChConst(x.toInt)) ||
+ (p"{" ~ Exps ~ p"}").map[Exp]{ case _ ~ x ~ _ => x }
-// boolean expressions
-lazy val BExp: Parser[List[Token], BExp] =
- (Exp ~ T_OP("==") ~ Exp) ==> { case x ~ _ ~ z => Bop("==", x, z): BExp } ||
- (Exp ~ T_OP("!=") ~ Exp) ==> { case x ~ _ ~ z => Bop("!=", x, z): BExp } ||
- (Exp ~ T_OP("<") ~ Exp) ==> { case x ~ _ ~ z => Bop("<", x, z): BExp } ||
- (Exp ~ T_OP(">") ~ Exp) ==> { case x ~ _ ~ z => Bop("<", z, x): BExp } ||
- (Exp ~ T_OP("<=") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", x, z): BExp } ||
- (Exp ~ T_OP("=>") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", z, x): BExp } ||
- (T_LPAREN ~ BExp ~ T_RPAREN) ==> { case _ ~ b ~ _ => b : BExp }
+lazy val BExp: Parser[Tokens, BExp] =
+ (Exp ~ OpParser(Set("==", "!=", "<", ">", "<=", ">=")) ~ Exp).map[BExp]{ case x ~ y ~ z => Bop(y, x, z) } ||
+ (p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ y ~ _ => y }
+
+lazy val TypedIdParser: Parser[Tokens, (String, String)] =
+ (IdParser ~ p":" ~ TypeParser(Set("Int", "Double"))).map{ case n ~ _ ~ t => (n, t) }
-lazy val Arg : Parser[List[Token], (String, String)] =
- (IdParser ~ T_COLON ~ TyParser) ==> { case x ~ _ ~ ty => (x, ty) }
+lazy val Defn: Parser[Tokens, Decl] =
+ (p"def" ~ IdParser ~ p"(" ~ ListParser(TypedIdParser, p",") ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{
+ case _ ~ y ~ _ ~ w ~ _ ~ _ ~ t ~ _ ~ b => Def(y, w, t, b)
+ } ||
+ (p"def" ~ IdParser ~ p"(" ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{
+ case _ ~ y ~ _ ~ _ ~ _ ~ t ~ _ ~ b => Def(y, Nil, t, b)
+ }
-lazy val Defn: Parser[List[Token], Decl] = {
- (T_KWD("def") ~ IdParser ~ T_LPAREN ~ T_RPAREN ~ T_COLON ~ TyParser ~ T_OP("=") ~ Exp) ==>
- { case _ ~ y ~ _ ~ _ ~ _~ ty ~ _ ~ r => Def(y, Nil, ty, r): Decl } ||
- (T_KWD("def") ~ IdParser ~ T_LPAREN ~ ListParser(Arg, T_COMMA) ~ T_RPAREN ~ T_COLON ~ TyParser ~ T_OP("=") ~ Exp) ==>
- { case _ ~ y ~ _ ~ w ~ _ ~ _~ ty ~ _ ~ r => Def(y, w, ty, r): Decl }
-}
+lazy val Constp: Parser[Tokens, Decl] =
+ (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ IntParser).map[Decl]{ // IntParser? Not Exp? For this AST, impossible to define Exp
+ case _ ~ id ~ _ ~ _ ~ _ ~ n => Const(id, n)
+ } ||
+ (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ IntParser).map[Decl]{ // IntParser? Not Exp? For this AST, impossible to define Exp
+ case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => Const(id, -n)
+ }
-lazy val Const_decl: Parser[List[Token], Decl] =
- (T_KWD("val") ~ Arg ~ T_OP("=") ~ NumParser) ==>
- { case _ ~ x ~ _ ~ v => Const(x._1, v): Decl } ||
- (T_KWD("val") ~ Arg ~ T_OP("=") ~ FNumParser) ==>
- { case _ ~ x ~ _ ~ v => FConst(x._1, v): Decl }
+// Int can be converted to Double but not viceversa
+lazy val FConstp: Parser[Tokens, Decl] =
+ (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{
+ case _ ~ id ~ _ ~ _ ~ _ ~ n => FConst(id, n)
+ } ||
+ (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{
+ case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => FConst(id, -n)
+ }
-lazy val Prog: Parser[List[Token], List[Decl]] =
- (Defn ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } ||
- (Const_decl ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } ||
- (Exp ==> ((s) => List(Main(s)) : List[Decl]))
+// Prog consists of global const declarations, f(x) defs, and exp in ANY order
+// restricted to main body at the bottom
+lazy val Prog: Parser[Tokens, List[Decl]] =
+ (Defn ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
+ (Constp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
+ (FConstp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
+ Exp.map[List[Decl]](s => List(Main(s)))
+def parse_tks(tokens: Tokens) = Prog.parse_all(tokens)
-// Reading tokens and Writing parse trees
+import scala.io.Source._
-//import ammonite.ops._
-
-def parse_tks(tks: List[Token]) : List[Decl] = {
- //println(Prog.parse(tks))
- Prog.parse_single(tks)
+@main
+def parse(filename: String) = {
+ val fun_code = fromFile(filename).getLines.mkString("\n")
+ // print the AST list to screen
+ println(parse_tks(tokenise(fun_code)))
}
-
-//@doc("Parses a file.")
-@main
-def main(fname: String) : Unit = {
- val tks = tokenise(os.read(os.pwd / fname))
- println(parse_tks(tks))
-}
-
-