diff -r b40aaffe0793 -r 2f86ebda3629 solutions/cw5/fun_parser.sc --- a/solutions/cw5/fun_parser.sc Sat Dec 03 21:58:47 2022 +0000 +++ b/solutions/cw5/fun_parser.sc Fri Dec 09 11:00:05 2022 +0000 @@ -1,215 +1,263 @@ -// A parser for the Fun language -//================================ -// -// call with -// -// amm fun_parser.sc fact.fun +// Author: Zhuo Ying Jiang Li +// Starting code by Dr Christian Urban + +// parser: convert sequence of tokens to AST + // -// amm fun_parser.sc defs.fun +// Use this command to print parsed AST: +// amm fun_parser.sc .fun // -// this will generate a parse-tree from a list -// of tokens -import scala.language.implicitConversions -import scala.language.reflectiveCalls +import $file.fun_tokens, fun_tokens._ -import $file.fun_tokens, fun_tokens._ - - -// Parser combinators -// type parameter I needs to be of Seq-type -// -abstract class Parser[I, T](implicit ev: I => Seq[_]) { - def parse(ts: I): Set[(T, I)] +// more convenience for the map parsers later on; +// it allows writing nested patterns as +// case x ~ y ~ z => ... +case class ~[+A, +B](x: A, y: B) - def parse_single(ts: I) : T = - parse(ts).partition(_._2.isEmpty) match { - case (good, _) if !good.isEmpty => good.head._1 - case (good, err) if err.isEmpty => { - println (s"Parse Error\n $good \n $err") ; sys.exit(-1) } - case (_, err) => { - println (s"Parse Error\n${err.minBy(_._2.length)}") ; sys.exit(-1) } - } +// constraint for the input +type IsSeq[A] = A => Seq[_] + +abstract class Parser[I : IsSeq, T]{ + def parse(in: I): Set[(T, I)] + + def parse_all(in: I) : Set[T] = + for ((hd, tl) <- parse(in); + if tl.isEmpty) yield hd } -// convenience for writing grammar rules -case class ~[+A, +B](_1: A, _2: B) +// parser combinators -class SeqParser[I, T, S](p: => Parser[I, T], - q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, ~[T, S]] { - def parse(sb: I) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2) +// sequence parser +class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], + q: => Parser[I, S]) extends Parser[I, ~[T, S]] { + def parse(in: I) = + for ((hd1, tl1) <- p.parse(in); + (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2) } -class AltParser[I, T](p: => Parser[I, T], - q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] { - def parse(sb: I) = p.parse(sb) ++ q.parse(sb) +// alternative parser +class AltParser[I : IsSeq, T](p: => Parser[I, T], + q: => Parser[I, T]) extends Parser[I, T] { + def parse(in: I) = p.parse(in) ++ q.parse(in) } -class FunParser[I, T, S](p: => Parser[I, T], - f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] { - def parse(sb: I) = - for ((head, tail) <- p.parse(sb)) yield (f(head), tail) +// map parser +class MapParser[I : IsSeq, T, S](p: => Parser[I, T], + f: T => S) extends Parser[I, S] { + def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) } -// convenient combinators -implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new { - def || (q : => Parser[I, T]) = new AltParser[I, T](p, q) - def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f) +// more convenient syntax for parser combinators +implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new { + def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) + def map[S](f: => T => S) = new MapParser[I, T, S](p, f) } -def ListParser[I, T, S](p: => Parser[I, T], - q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = { - (p ==> ((s) => List(s))) || - (p ~ q ~ ListParser(p, q)) ==> { case x ~ _ ~ z => x :: z : List[T] } -} +// ------------------------------------------------- +// atomic parsers -case class TokParser(tok: Token) extends Parser[List[Token], Token] { - def parse(ts: List[Token]) = ts match { - case t::ts if (t == tok) => Set((t, ts)) +// atomic parser for types +case class TypeParser(ty: Set[String]) extends Parser[Tokens, String] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._1 == "type" && ty.contains(tk._2) => Set((tk._2, tkns)) case _ => Set() } } -implicit def token2tparser(t: Token) = TokParser(t) - -implicit def TokOps(t: Token) = new { - def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q) - def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f) - def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q) +// atomic parser for global ids +case object GlobalIdParser extends Parser[Tokens, String] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._1 == "global" => Set((tk._2, tkns)) + case _ => Set() + } } -case object EmptyParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = Set(("", ts)) +// atomic parser for ids +case object IdParser extends Parser[Tokens, String] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._1 == "id" => Set((tk._2, tkns)) + case _ => Set() + } } -case object NumParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_NUM(n)::ts => Set((n, ts)) - case _ => Set () +// atomic parser for doubles (I use Float because that's what is used in the AST structures given in CW5) +case object DoubleParser extends Parser[Tokens, Float] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._1 == "double" => Set((tk._2.toFloat, tkns)) + case _ => Set() } } -case object FNumParser extends Parser[List[Token], Double] { - def parse(ts: List[Token]) = ts match { - case T_FNUM(x)::ts => Set((x, ts)) +// atomic parser for integers +case object IntParser extends Parser[Tokens, Int] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._1 == "int" => Set((tk._2.toInt, tkns)) + case _ => Set() + } +} + +// atomic parser for operators +case class OpParser(ops: Set[String]) extends Parser[Tokens, String] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._1 == "op" && ops.contains(tk._2) => Set((tk._2, tkns)) + case _ => Set() + } +} + +// atomic parser for character +case object CharParser extends Parser[Tokens, Char] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._1 == "ch" => { + val stripped = tk._2.slice(1, tk._2.length-1) // strip off single quotes + stripped match { + case "\\n" => Set(('\n', tkns)) + case "\\t" => Set(('\t', tkns)) + case "\\r" => Set(('\r', tkns)) + case c => Set((c(0), tkns)) + } + } case _ => Set() } } -case object IdParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = ts match { - case T_ID(s)::ts => Set((s, ts)) - case _ => Set () +// parser for list of arguments +def ListParser[I, T, S](p: => Parser[I, T], + q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = { + (p ~ q ~ ListParser(p, q)).map{ case x ~ _ ~ z => x :: z : List[T] } || + (p.map((s) => List(s))) +} + +// I may want to write string interpolations for: +// keywords, semicolon, colon, comma, parentheses +case class StrParser(s: String) extends Parser[Tokens, String] { + def parse(tokens: Tokens) = tokens match { + case Nil => Set() + case tk::tkns if tk._2 == s => Set((s, tkns)) + case _ => Set() } } -case object CharConstParser extends Parser[List[Token], Int] { - def parse(ts: List[Token]) = ts match { - case T_CHR(c)::ts => Set((c, ts)) - case _ => Set () - } -} - -case object TyParser extends Parser[List[Token], String] { - def parse(ts: List[Token]) = ts match { - case T_TY(s)::ts => Set((s, ts)) - case _ => Set () - } +implicit def parser_interpolation(sc: StringContext) = new { + def p(args: Any*) = StrParser(sc.s(args:_*)) } -// Abstract syntax trees for the Fun language -abstract class Exp -abstract class BExp -abstract class Decl +// the AST datastructures for the FUN language + +abstract class Exp +abstract class BExp +abstract class Decl case class Def(name: String, args: List[(String, String)], ty: String, body: Exp) extends Decl case class Main(e: Exp) extends Decl case class Const(name: String, v: Int) extends Decl -case class FConst(name: String, x: Double) extends Decl +case class FConst(name: String, x: Float) extends Decl case class Call(name: String, args: List[Exp]) extends Exp case class If(a: BExp, e1: Exp, e2: Exp) extends Exp case class Var(s: String) extends Exp -case class Num(i: Int) extends Exp // integer numbers -case class FNum(i: Double) extends Exp // floating numbers -case class ChConst(c: Int) extends Exp // char constant +case class Num(i: Int) extends Exp // integer numbers +case class FNum(i: Float) extends Exp // float numbers +case class ChConst(c: Int) extends Exp // character constants case class Aop(o: String, a1: Exp, a2: Exp) extends Exp -case class Sequence(e1: Exp, e2: Exp) extends Exp +case class Sequence(e1: Exp, e2: Exp) extends Exp // expressions separated by semicolons + case class Bop(o: String, a1: Exp, a2: Exp) extends BExp -// arithmetic expressions (there needs to be an F in the SEMICOLON case) -lazy val Exp: Parser[List[Token], Exp] = - (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==> - { case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z): Exp } || - (F ~ T_SEMI ~ Exp) ==> { case x ~ _ ~ y => Sequence(x, y): Exp } || L -lazy val L: Parser[List[Token], Exp] = - (T ~ T_OP("+") ~ Exp) ==> { case x ~ _ ~ z => Aop("+", x, z): Exp } || - (T ~ T_OP("-") ~ Exp) ==> { case x ~ _ ~ z => Aop("-", x, z): Exp } || T -lazy val T: Parser[List[Token], Exp] = - (F ~ T_OP("*") ~ T) ==> { case x ~ _ ~ z => Aop("*", x, z): Exp } || - (F ~ T_OP("/") ~ T) ==> { case x ~ _ ~ z => Aop("/", x, z): Exp } || - (F ~ T_OP("%") ~ T) ==> { case x ~ _ ~ z => Aop("%", x, z): Exp } || F -lazy val F: Parser[List[Token], Exp] = - (IdParser ~ T_LPAREN ~ T_RPAREN) ==> { case x ~ _ ~ _ => Call(x, Nil): Exp } || - (IdParser ~ T_LPAREN ~ ListParser(Exp, T_COMMA) ~ T_RPAREN) ==> { case x ~ _ ~ z ~ _ => Call(x, z): Exp } || - (T_LPAREN ~ Exp ~ T_RPAREN) ==> { case _ ~ y ~ _ => y: Exp } || - IdParser ==> { case x => Var(x): Exp } || - NumParser ==> { case x => Num(x): Exp } || - CharConstParser ==> { case x => ChConst(x): Exp } || - FNumParser ==> { case x => FNum(x): Exp } +lazy val Exps: Parser[Tokens, Exp] = + (Exp ~ p";" ~ Exps).map[Exp]{ case x ~ _ ~ z => Sequence(x, z) } || + Exp + +lazy val Exp: Parser[Tokens, Exp] = + (p"if" ~ BExp ~ p"then" ~ Exp ~ p"else" ~ Exp).map[Exp]{ case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z) } || + M + +lazy val M: Parser[Tokens, Exp] = + (T ~ OpParser(Set("+", "-")) ~ M).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } || + T + +lazy val T: Parser[Tokens, Exp] = + (U ~ OpParser(Set("*", "/", "%")) ~ T).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } || + U + +// includes negative factor +// a + - b CAN be recognised +// - - - b CAN be recognised +lazy val U: Parser[Tokens, Exp] = + (OpParser(Set("-")) ~ U).map[Exp]{ case _ ~ y => Aop("*", Num(-1), y) } || + (OpParser(Set("+")) ~ U).map[Exp]{ case _ ~ y => y } || + F + +lazy val F: Parser[Tokens, Exp] = + (p"(" ~ Exp ~ p")").map[Exp]{ case _ ~ y ~ _ => y } || + (p"skip").map(_ => Call("skip", Nil)) || // hardcoded + (p"skip" ~ p"(" ~ p")").map(_ => Call("skip", Nil)) || // hardcoded + (IdParser ~ p"(" ~ ListParser(Exp, p",") ~ p")").map[Exp]{ case id ~ _ ~ args ~ _ => Call(id, args) } || + (IdParser ~ p"(" ~ p")").map[Exp]{ case id ~ _ ~ _ => Call(id, Nil) } || // NOTE: empty args are also accepted! + (IdParser || GlobalIdParser).map(x => Var(x)) || + IntParser.map(x => Num(x)) || + DoubleParser.map(x => FNum(x)) || + CharParser.map(x => ChConst(x.toInt)) || + (p"{" ~ Exps ~ p"}").map[Exp]{ case _ ~ x ~ _ => x } -// boolean expressions -lazy val BExp: Parser[List[Token], BExp] = - (Exp ~ T_OP("==") ~ Exp) ==> { case x ~ _ ~ z => Bop("==", x, z): BExp } || - (Exp ~ T_OP("!=") ~ Exp) ==> { case x ~ _ ~ z => Bop("!=", x, z): BExp } || - (Exp ~ T_OP("<") ~ Exp) ==> { case x ~ _ ~ z => Bop("<", x, z): BExp } || - (Exp ~ T_OP(">") ~ Exp) ==> { case x ~ _ ~ z => Bop("<", z, x): BExp } || - (Exp ~ T_OP("<=") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", x, z): BExp } || - (Exp ~ T_OP("=>") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", z, x): BExp } || - (T_LPAREN ~ BExp ~ T_RPAREN) ==> { case _ ~ b ~ _ => b : BExp } +lazy val BExp: Parser[Tokens, BExp] = + (Exp ~ OpParser(Set("==", "!=", "<", ">", "<=", ">=")) ~ Exp).map[BExp]{ case x ~ y ~ z => Bop(y, x, z) } || + (p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ y ~ _ => y } + +lazy val TypedIdParser: Parser[Tokens, (String, String)] = + (IdParser ~ p":" ~ TypeParser(Set("Int", "Double"))).map{ case n ~ _ ~ t => (n, t) } -lazy val Arg : Parser[List[Token], (String, String)] = - (IdParser ~ T_COLON ~ TyParser) ==> { case x ~ _ ~ ty => (x, ty) } +lazy val Defn: Parser[Tokens, Decl] = + (p"def" ~ IdParser ~ p"(" ~ ListParser(TypedIdParser, p",") ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{ + case _ ~ y ~ _ ~ w ~ _ ~ _ ~ t ~ _ ~ b => Def(y, w, t, b) + } || + (p"def" ~ IdParser ~ p"(" ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{ + case _ ~ y ~ _ ~ _ ~ _ ~ t ~ _ ~ b => Def(y, Nil, t, b) + } -lazy val Defn: Parser[List[Token], Decl] = { - (T_KWD("def") ~ IdParser ~ T_LPAREN ~ T_RPAREN ~ T_COLON ~ TyParser ~ T_OP("=") ~ Exp) ==> - { case _ ~ y ~ _ ~ _ ~ _~ ty ~ _ ~ r => Def(y, Nil, ty, r): Decl } || - (T_KWD("def") ~ IdParser ~ T_LPAREN ~ ListParser(Arg, T_COMMA) ~ T_RPAREN ~ T_COLON ~ TyParser ~ T_OP("=") ~ Exp) ==> - { case _ ~ y ~ _ ~ w ~ _ ~ _~ ty ~ _ ~ r => Def(y, w, ty, r): Decl } -} +lazy val Constp: Parser[Tokens, Decl] = + (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ IntParser).map[Decl]{ // IntParser? Not Exp? For this AST, impossible to define Exp + case _ ~ id ~ _ ~ _ ~ _ ~ n => Const(id, n) + } || + (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ IntParser).map[Decl]{ // IntParser? Not Exp? For this AST, impossible to define Exp + case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => Const(id, -n) + } -lazy val Const_decl: Parser[List[Token], Decl] = - (T_KWD("val") ~ Arg ~ T_OP("=") ~ NumParser) ==> - { case _ ~ x ~ _ ~ v => Const(x._1, v): Decl } || - (T_KWD("val") ~ Arg ~ T_OP("=") ~ FNumParser) ==> - { case _ ~ x ~ _ ~ v => FConst(x._1, v): Decl } +// Int can be converted to Double but not viceversa +lazy val FConstp: Parser[Tokens, Decl] = + (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{ + case _ ~ id ~ _ ~ _ ~ _ ~ n => FConst(id, n) + } || + (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{ + case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => FConst(id, -n) + } -lazy val Prog: Parser[List[Token], List[Decl]] = - (Defn ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } || - (Const_decl ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } || - (Exp ==> ((s) => List(Main(s)) : List[Decl])) +// Prog consists of global const declarations, f(x) defs, and exp in ANY order +// restricted to main body at the bottom +lazy val Prog: Parser[Tokens, List[Decl]] = + (Defn ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } || + (Constp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } || + (FConstp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } || + Exp.map[List[Decl]](s => List(Main(s))) +def parse_tks(tokens: Tokens) = Prog.parse_all(tokens) -// Reading tokens and Writing parse trees +import scala.io.Source._ -//import ammonite.ops._ - -def parse_tks(tks: List[Token]) : List[Decl] = { - //println(Prog.parse(tks)) - Prog.parse_single(tks) +@main +def parse(filename: String) = { + val fun_code = fromFile(filename).getLines.mkString("\n") + // print the AST list to screen + println(parse_tks(tokenise(fun_code))) } - -//@doc("Parses a file.") -@main -def main(fname: String) : Unit = { - val tks = tokenise(os.read(os.pwd / fname)) - println(parse_tks(tks)) -} - -