solutions/cw5/fun_parser.sc
changeset 903 2f86ebda3629
parent 894 02ef5c3abc51
child 920 7af2eea19646
--- a/solutions/cw5/fun_parser.sc	Sat Dec 03 21:58:47 2022 +0000
+++ b/solutions/cw5/fun_parser.sc	Fri Dec 09 11:00:05 2022 +0000
@@ -1,215 +1,263 @@
-// A parser for the Fun language
-//================================
-//
-// call with 
-//
-//     amm fun_parser.sc fact.fun
+// Author: Zhuo Ying Jiang Li
+// Starting code by Dr Christian Urban
+
+// parser: convert sequence of tokens to AST
+
 //
-//     amm fun_parser.sc defs.fun
+// Use this command to print parsed AST:
+// amm fun_parser.sc <name>.fun
 //
-// this will generate a parse-tree from a list
-// of tokens
 
-import scala.language.implicitConversions    
-import scala.language.reflectiveCalls
+import $file.fun_tokens, fun_tokens._
 
-import $file.fun_tokens, fun_tokens._ 
-
-
-// Parser combinators
-//    type parameter I needs to be of Seq-type
-//
-abstract class Parser[I, T](implicit ev: I => Seq[_]) {
-  def parse(ts: I): Set[(T, I)]
+// more convenience for the map parsers later on;
+// it allows writing nested patterns as
+// case x ~ y ~ z => ...
+case class ~[+A, +B](x: A, y: B)
 
-  def parse_single(ts: I) : T = 
-    parse(ts).partition(_._2.isEmpty) match {
-      case (good, _) if !good.isEmpty => good.head._1
-      case (good, err) if err.isEmpty => {
-        println (s"Parse Error\n $good \n $err") ; sys.exit(-1) }
-      case (_, err) => { 
-	println (s"Parse Error\n${err.minBy(_._2.length)}") ; sys.exit(-1) }
-    }
+// constraint for the input
+type IsSeq[A] = A => Seq[_]
+
+abstract class Parser[I : IsSeq, T]{
+  def parse(in: I): Set[(T, I)]
+
+  def parse_all(in: I) : Set[T] =
+    for ((hd, tl) <- parse(in);
+        if tl.isEmpty) yield hd
 }
 
-// convenience for writing grammar rules
-case class ~[+A, +B](_1: A, _2: B)
+// parser combinators
 
-class SeqParser[I, T, S](p: => Parser[I, T], 
-                         q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, ~[T, S]] {
-  def parse(sb: I) = 
-    for ((head1, tail1) <- p.parse(sb); 
-         (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2)
+// sequence parser
+class SeqParser[I : IsSeq, T, S](p: => Parser[I, T],
+                                 q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
+  def parse(in: I) =
+    for ((hd1, tl1) <- p.parse(in);
+         (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)
 }
 
-class AltParser[I, T](p: => Parser[I, T], 
-                      q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] {
-  def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
+// alternative parser
+class AltParser[I : IsSeq, T](p: => Parser[I, T],
+                              q: => Parser[I, T]) extends Parser[I, T] {
+  def parse(in: I) = p.parse(in) ++ q.parse(in)
 }
 
-class FunParser[I, T, S](p: => Parser[I, T], 
-                         f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] {
-  def parse(sb: I) = 
-    for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
+// map parser
+class MapParser[I : IsSeq, T, S](p: => Parser[I, T],
+                                 f: T => S) extends Parser[I, S] {
+  def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
 }
 
-// convenient combinators
-implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
-  def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
-  def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
+// more convenient syntax for parser combinators
+implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new {
+  def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
+  def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
 }
 
-def ListParser[I, T, S](p: => Parser[I, T], 
-                        q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
-  (p ==> ((s) => List(s))) ||
-  (p ~ q ~ ListParser(p, q)) ==> { case x ~ _ ~ z => x :: z : List[T] }
-}
+// -------------------------------------------------
+// atomic parsers
 
-case class TokParser(tok: Token) extends Parser[List[Token], Token] {
-  def parse(ts: List[Token]) = ts match {
-    case t::ts if (t == tok) => Set((t, ts)) 
+// atomic parser for types
+case class TypeParser(ty: Set[String]) extends Parser[Tokens, String] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._1 == "type" && ty.contains(tk._2) => Set((tk._2, tkns))
     case _ => Set()
   }
 }
 
-implicit def token2tparser(t: Token) = TokParser(t)
-
-implicit def TokOps(t: Token) = new {
-  def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q)
-  def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f)
-  def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q)
+// atomic parser for global ids
+case object GlobalIdParser extends Parser[Tokens, String] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._1 == "global" => Set((tk._2, tkns))
+    case _ => Set()
+  }
 }
 
-case object EmptyParser extends Parser[List[Token], String] {
-  def parse(ts: List[Token]) = Set(("", ts))
+// atomic parser for ids
+case object IdParser extends Parser[Tokens, String] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._1 == "id" => Set((tk._2, tkns))
+    case _ => Set()
+  }
 }
 
-case object NumParser extends Parser[List[Token], Int] {
-  def parse(ts: List[Token]) = ts match {
-    case T_NUM(n)::ts => Set((n, ts)) 
-    case _ => Set ()
+// atomic parser for doubles (I use Float because that's what is used in the AST structures given in CW5)
+case object DoubleParser extends Parser[Tokens, Float] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._1 == "double" => Set((tk._2.toFloat, tkns))
+    case _ => Set()
   }
 }
 
-case object FNumParser extends Parser[List[Token], Double] {
-  def parse(ts: List[Token]) = ts match {
-    case T_FNUM(x)::ts => Set((x, ts)) 
+// atomic parser for integers
+case object IntParser extends Parser[Tokens, Int] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._1 == "int" => Set((tk._2.toInt, tkns))
+    case _ => Set()
+  }
+}
+
+// atomic parser for operators
+case class OpParser(ops: Set[String]) extends Parser[Tokens, String] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._1 == "op" && ops.contains(tk._2) => Set((tk._2, tkns))
+    case _ => Set()
+  }
+}
+
+// atomic parser for character
+case object CharParser extends Parser[Tokens, Char] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._1 == "ch" => {
+      val stripped = tk._2.slice(1, tk._2.length-1)  // strip off single quotes
+      stripped match {
+        case "\\n" => Set(('\n', tkns))
+        case "\\t" => Set(('\t', tkns))
+        case "\\r" => Set(('\r', tkns))
+        case c => Set((c(0), tkns))
+      }
+    }
     case _ => Set()
   }
 }
 
-case object IdParser extends Parser[List[Token], String] {
-  def parse(ts: List[Token]) = ts match {
-    case T_ID(s)::ts => Set((s, ts)) 
-    case _ => Set ()
+// parser for list of arguments
+def ListParser[I, T, S](p: => Parser[I, T], 
+                        q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
+  (p ~ q ~ ListParser(p, q)).map{ case x ~ _ ~ z => x :: z : List[T] } ||
+  (p.map((s) => List(s)))
+}
+
+// I may want to write string interpolations for:
+// keywords, semicolon, colon, comma, parentheses
+case class StrParser(s: String) extends Parser[Tokens, String] {
+  def parse(tokens: Tokens) = tokens match {
+    case Nil => Set()
+    case tk::tkns if tk._2 == s => Set((s, tkns))
+    case _ => Set()
   }
 }
 
-case object CharConstParser extends Parser[List[Token], Int] {
-  def parse(ts: List[Token]) = ts match {
-    case T_CHR(c)::ts => Set((c, ts)) 
-    case _ => Set ()
-  }
-}
-
-case object TyParser extends Parser[List[Token], String] {
-  def parse(ts: List[Token]) = ts match {
-    case T_TY(s)::ts => Set((s, ts)) 
-    case _ => Set ()
-  }
+implicit def parser_interpolation(sc: StringContext) = new {
+  def p(args: Any*) = StrParser(sc.s(args:_*))
 }
 
 
-// Abstract syntax trees for the Fun language
-abstract class Exp 
-abstract class BExp 
-abstract class Decl 
+// the AST datastructures for the FUN language
+
+abstract class Exp
+abstract class BExp
+abstract class Decl
 
 case class Def(name: String, args: List[(String, String)], ty: String, body: Exp) extends Decl
 case class Main(e: Exp) extends Decl
 case class Const(name: String, v: Int) extends Decl
-case class FConst(name: String, x: Double) extends Decl
+case class FConst(name: String, x: Float) extends Decl
 
 case class Call(name: String, args: List[Exp]) extends Exp
 case class If(a: BExp, e1: Exp, e2: Exp) extends Exp
 case class Var(s: String) extends Exp
-case class Num(i: Int) extends Exp     // integer numbers
-case class FNum(i: Double) extends Exp  // floating numbers
-case class ChConst(c: Int) extends Exp // char constant
+case class Num(i: Int) extends Exp  // integer numbers
+case class FNum(i: Float) extends Exp  // float numbers
+case class ChConst(c: Int) extends Exp  // character constants
 case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
-case class Sequence(e1: Exp, e2: Exp) extends Exp
+case class Sequence(e1: Exp, e2: Exp) extends Exp  // expressions separated by semicolons
+
 case class Bop(o: String, a1: Exp, a2: Exp) extends BExp
 
 
-// arithmetic expressions (there needs to be an F in the SEMICOLON case)
-lazy val Exp: Parser[List[Token], Exp] = 
-  (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==>
-    { case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z): Exp } ||
-  (F ~ T_SEMI ~ Exp) ==> { case x ~ _ ~ y => Sequence(x, y): Exp } || L
-lazy val L: Parser[List[Token], Exp] = 
-  (T ~ T_OP("+") ~ Exp) ==> { case x ~ _ ~ z => Aop("+", x, z): Exp } ||
-  (T ~ T_OP("-") ~ Exp) ==> { case x ~ _ ~ z => Aop("-", x, z): Exp } || T  
-lazy val T: Parser[List[Token], Exp] = 
-  (F ~ T_OP("*") ~ T) ==> { case x ~ _ ~ z => Aop("*", x, z): Exp } || 
-  (F ~ T_OP("/") ~ T) ==> { case x ~ _ ~ z => Aop("/", x, z): Exp } || 
-  (F ~ T_OP("%") ~ T) ==> { case x ~ _ ~ z => Aop("%", x, z): Exp } || F
-lazy val F: Parser[List[Token], Exp] = 
-  (IdParser ~ T_LPAREN ~ T_RPAREN) ==> { case x ~ _ ~ _ => Call(x, Nil): Exp } ||
-  (IdParser ~ T_LPAREN ~ ListParser(Exp, T_COMMA) ~ T_RPAREN) ==> { case x ~ _ ~ z ~ _ => Call(x, z): Exp } ||
-  (T_LPAREN ~ Exp ~ T_RPAREN) ==> { case _ ~ y ~ _ => y: Exp } || 
-  IdParser ==> { case x => Var(x): Exp } || 
-  NumParser ==> { case x => Num(x): Exp } ||
-  CharConstParser ==> { case x => ChConst(x): Exp } ||
-  FNumParser ==> { case x => FNum(x): Exp }
+lazy val Exps: Parser[Tokens, Exp] =
+  (Exp ~ p";" ~ Exps).map[Exp]{ case x ~ _ ~ z => Sequence(x, z) } ||
+  Exp
+
+lazy val Exp: Parser[Tokens, Exp] =
+  (p"if" ~ BExp ~ p"then" ~ Exp ~ p"else" ~ Exp).map[Exp]{ case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z) } ||
+  M
+
+lazy val M: Parser[Tokens, Exp] = 
+  (T ~ OpParser(Set("+", "-")) ~ M).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } ||
+  T
+
+lazy val T: Parser[Tokens, Exp] = 
+  (U ~ OpParser(Set("*", "/", "%")) ~ T).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } ||
+  U
+
+// includes negative factor
+// a + - b CAN be recognised
+// - - - b CAN be recognised
+lazy val U: Parser[Tokens, Exp] =
+  (OpParser(Set("-")) ~ U).map[Exp]{ case _ ~ y => Aop("*", Num(-1), y) } ||
+  (OpParser(Set("+")) ~ U).map[Exp]{ case _ ~ y => y } ||
+  F
+
+lazy val F: Parser[Tokens, Exp] = 
+  (p"(" ~ Exp ~ p")").map[Exp]{ case _ ~ y ~ _ => y } ||
+  (p"skip").map(_ => Call("skip", Nil)) ||  // hardcoded
+  (p"skip" ~ p"(" ~ p")").map(_ => Call("skip", Nil)) ||  // hardcoded
+  (IdParser ~ p"(" ~ ListParser(Exp, p",") ~ p")").map[Exp]{ case id ~ _ ~ args ~ _ => Call(id, args) } ||
+  (IdParser ~ p"(" ~ p")").map[Exp]{ case id ~ _ ~ _ => Call(id, Nil) } ||  // NOTE: empty args are also accepted!
+  (IdParser || GlobalIdParser).map(x => Var(x)) ||
+  IntParser.map(x => Num(x)) ||
+  DoubleParser.map(x => FNum(x)) ||
+  CharParser.map(x => ChConst(x.toInt)) ||
+  (p"{" ~ Exps ~ p"}").map[Exp]{ case _ ~ x ~ _ => x }
 
-// boolean expressions
-lazy val BExp: Parser[List[Token], BExp] = 
-  (Exp ~ T_OP("==") ~ Exp) ==> { case x ~ _ ~ z => Bop("==", x, z): BExp } || 
-  (Exp ~ T_OP("!=") ~ Exp) ==> { case x ~ _ ~ z => Bop("!=", x, z): BExp } || 
-  (Exp ~ T_OP("<") ~ Exp)  ==> { case x ~ _ ~ z => Bop("<",  x, z): BExp } || 
-  (Exp ~ T_OP(">") ~ Exp)  ==> { case x ~ _ ~ z => Bop("<",  z, x): BExp } || 
-  (Exp ~ T_OP("<=") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", x, z): BExp } || 
-  (Exp ~ T_OP("=>") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", z, x): BExp } ||
-  (T_LPAREN ~ BExp ~ T_RPAREN) ==> { case _ ~ b ~ _ => b : BExp } 
+lazy val BExp: Parser[Tokens, BExp] = 
+  (Exp ~ OpParser(Set("==", "!=", "<", ">", "<=", ">=")) ~ Exp).map[BExp]{ case x ~ y ~ z => Bop(y, x, z) } ||
+  (p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ y ~ _ => y }
+
+lazy val TypedIdParser: Parser[Tokens, (String, String)] =
+  (IdParser ~ p":" ~ TypeParser(Set("Int", "Double"))).map{ case n ~ _ ~ t => (n, t) }
 
-lazy val Arg : Parser[List[Token], (String, String)] = 
-  (IdParser ~ T_COLON ~ TyParser) ==> { case x ~ _ ~ ty => (x, ty) }  
+lazy val Defn: Parser[Tokens, Decl] =
+  (p"def" ~ IdParser ~ p"(" ~ ListParser(TypedIdParser, p",") ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{
+    case _ ~ y ~ _ ~ w ~ _ ~ _ ~ t ~ _ ~ b => Def(y, w, t, b)
+  } ||
+  (p"def" ~ IdParser ~ p"(" ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{
+    case _ ~ y ~ _ ~ _ ~ _ ~ t ~ _ ~ b => Def(y, Nil, t, b)
+  }
 
-lazy val Defn: Parser[List[Token], Decl] = {
-   (T_KWD("def") ~ IdParser ~ T_LPAREN ~ T_RPAREN ~ T_COLON ~ TyParser ~ T_OP("=") ~ Exp) ==>
-     { case _ ~ y ~ _ ~ _ ~ _~ ty ~ _ ~ r => Def(y, Nil, ty, r): Decl } ||
-   (T_KWD("def") ~ IdParser ~ T_LPAREN ~ ListParser(Arg, T_COMMA) ~ T_RPAREN ~ T_COLON ~ TyParser ~ T_OP("=") ~ Exp) ==>
-     { case _ ~ y ~ _ ~ w ~ _ ~ _~ ty ~ _ ~ r => Def(y, w, ty, r): Decl }
-}
+lazy val Constp: Parser[Tokens, Decl] = 
+  (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ IntParser).map[Decl]{  // IntParser? Not Exp? For this AST, impossible to define Exp
+    case _ ~ id ~ _ ~ _ ~ _ ~ n => Const(id, n)
+  } ||
+  (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ IntParser).map[Decl]{  // IntParser? Not Exp? For this AST, impossible to define Exp
+    case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => Const(id, -n)
+  }
 
-lazy val Const_decl: Parser[List[Token], Decl] =
-   (T_KWD("val") ~ Arg ~ T_OP("=") ~ NumParser) ==>
-     { case _ ~ x ~ _ ~ v => Const(x._1, v): Decl } ||
-   (T_KWD("val") ~ Arg ~ T_OP("=") ~ FNumParser) ==>
-     { case _ ~ x ~ _ ~ v => FConst(x._1, v): Decl } 
+// Int can be converted to Double but not viceversa
+lazy val FConstp: Parser[Tokens, Decl] =
+  (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{
+    case _ ~ id ~ _ ~ _ ~ _ ~ n => FConst(id, n)
+  } ||
+  (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{
+    case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => FConst(id, -n)
+  }
 
-lazy val Prog: Parser[List[Token], List[Decl]] =
-  (Defn ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } ||
-  (Const_decl ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } ||
-  (Exp ==> ((s) => List(Main(s)) : List[Decl]))
+// Prog consists of global const declarations, f(x) defs, and exp in ANY order
+// restricted to main body at the bottom
+lazy val Prog: Parser[Tokens, List[Decl]] = 
+  (Defn ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
+  (Constp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
+  (FConstp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
+  Exp.map[List[Decl]](s => List(Main(s)))
 
 
+def parse_tks(tokens: Tokens) = Prog.parse_all(tokens)
 
-// Reading tokens and Writing parse trees
+import scala.io.Source._
 
-//import ammonite.ops._
-
-def parse_tks(tks: List[Token]) : List[Decl] = {
-  //println(Prog.parse(tks))
-  Prog.parse_single(tks)
+@main
+def parse(filename: String) = {
+  val fun_code = fromFile(filename).getLines.mkString("\n")
+  // print the AST list to screen
+  println(parse_tks(tokenise(fun_code)))
 }
-
-//@doc("Parses a file.")
-@main
-def main(fname: String) : Unit = {
-  val tks = tokenise(os.read(os.pwd / fname))
-  println(parse_tks(tks))
-}
-
-