solutions/cw5/fun_parser.sc
author Christian Urban <christian.urban@kcl.ac.uk>
Tue, 19 Sep 2023 09:54:41 +0100
changeset 920 7af2eea19646
parent 903 2f86ebda3629
permissions -rw-r--r--
updated

// Author: Zhuo Ying Jiang Li
// Starting code by Dr Christian Urban

// parser: convert sequence of tokens to AST

//
// Use this command to print parsed AST:
// amm fun_parser.sc <name>.fun
//

import $file.fun_tokens, fun_tokens._

// more convenience for the map parsers later on;
// it allows writing nested patterns as
// case x ~ y ~ z => ...
case class ~[+A, +B](x: A, y: B)

// parser combinators
abstract class Parser[I, T](using is: I => Seq[_])  {
  def parse(in: I): Set[(T, I)]  

  def parse_all(in: I) : Set[T] =
    for ((hd, tl) <- parse(in); 
        if is(tl).isEmpty) yield hd
}

// alternative parser
class AltParser[I, T](p: => Parser[I, T], 
                      q: => Parser[I, T])(using I => Seq[_]) extends Parser[I, T] {
  def parse(in: I) = p.parse(in) ++ q.parse(in)   
}

// sequence parser
class SeqParser[I, T, S](p: => Parser[I, T], 
                         q: => Parser[I, S])(using I => Seq[_]) extends Parser[I, ~[T, S]] {
  def parse(in: I) = 
    for ((hd1, tl1) <- p.parse(in); 
         (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)
}

// map parser
class MapParser[I, T, S](p: => Parser[I, T], 
                         f: T => S)(using I => Seq[_]) extends Parser[I, S] {
  def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
}


// more convenient syntax for parser combinators
extension [I, T](p: Parser[I, T])(using I => Seq[_]) {
  def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
  def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
  def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
}


// -------------------------------------------------
// atomic parsers

// atomic parser for types
case class TypeParser(ty: Set[String]) extends Parser[Tokens, String] {
  def parse(tokens: Tokens) = tokens match {
    case Nil => Set()
    case tk::tkns if tk._1 == "type" && ty.contains(tk._2) => Set((tk._2, tkns))
    case _ => Set()
  }
}

// atomic parser for global ids
case object GlobalIdParser extends Parser[Tokens, String] {
  def parse(tokens: Tokens) = tokens match {
    case Nil => Set()
    case tk::tkns if tk._1 == "global" => Set((tk._2, tkns))
    case _ => Set()
  }
}

// atomic parser for ids
case object IdParser extends Parser[Tokens, String] {
  def parse(tokens: Tokens) = tokens match {
    case Nil => Set()
    case tk::tkns if tk._1 == "id" => Set((tk._2, tkns))
    case _ => Set()
  }
}

// atomic parser for doubles (I use Float because that's what is used in the AST structures given in CW5)
case object DoubleParser extends Parser[Tokens, Float] {
  def parse(tokens: Tokens) = tokens match {
    case Nil => Set()
    case tk::tkns if tk._1 == "double" => Set((tk._2.toFloat, tkns))
    case _ => Set()
  }
}

// atomic parser for integers
case object IntParser extends Parser[Tokens, Int] {
  def parse(tokens: Tokens) = tokens match {
    case Nil => Set()
    case tk::tkns if tk._1 == "int" => Set((tk._2.toInt, tkns))
    case _ => Set()
  }
}

// atomic parser for operators
case class OpParser(ops: Set[String]) extends Parser[Tokens, String] {
  def parse(tokens: Tokens) = tokens match {
    case Nil => Set()
    case tk::tkns if tk._1 == "op" && ops.contains(tk._2) => Set((tk._2, tkns))
    case _ => Set()
  }
}

// atomic parser for character
case object CharParser extends Parser[Tokens, Char] {
  def parse(tokens: Tokens) = tokens match {
    case Nil => Set()
    case tk::tkns if tk._1 == "ch" => {
      val stripped = tk._2.slice(1, tk._2.length-1)  // strip off single quotes
      stripped match {
        case "\\n" => Set(('\n', tkns))
        case "\\t" => Set(('\t', tkns))
        case "\\r" => Set(('\r', tkns))
        case c => Set((c(0), tkns))
      }
    }
    case _ => Set()
  }
}

// parser for list of arguments
def ListParser[I, T, S](p: => Parser[I, T], 
                        q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
  (p ~ q ~ ListParser(p, q)).map{ case x ~ _ ~ z => x :: z : List[T] } ||
  (p.map((s) => List(s)))
}

// I may want to write string interpolations for:
// keywords, semicolon, colon, comma, parentheses
case class StrParser(s: String) extends Parser[Tokens, String] {
  def parse(tokens: Tokens) = tokens match {
    case Nil => Set()
    case tk::tkns if tk._2 == s => Set((s, tkns))
    case _ => Set()
  }
}

extension (sc: StringContext) {
   def p(args: Any*) = StrParser(sc.s(args:_*))
}


// the AST datastructures for the FUN language

abstract class Exp
abstract class BExp
abstract class Decl

case class Def(name: String, args: List[(String, String)], ty: String, body: Exp) extends Decl
case class Main(e: Exp) extends Decl
case class Const(name: String, v: Int) extends Decl
case class FConst(name: String, x: Float) extends Decl

case class Call(name: String, args: List[Exp]) extends Exp
case class If(a: BExp, e1: Exp, e2: Exp) extends Exp
case class Var(s: String) extends Exp
case class Num(i: Int) extends Exp  // integer numbers
case class FNum(i: Float) extends Exp  // float numbers
case class ChConst(c: Int) extends Exp  // character constants
case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
case class Sequence(e1: Exp, e2: Exp) extends Exp  // expressions separated by semicolons

case class Bop(o: String, a1: Exp, a2: Exp) extends BExp


lazy val Exps: Parser[Tokens, Exp] =
  (Exp ~ p";" ~ Exps).map[Exp]{ case x ~ _ ~ z => Sequence(x, z) } ||
  Exp

lazy val Exp: Parser[Tokens, Exp] =
  (p"if" ~ BExp ~ p"then" ~ Exp ~ p"else" ~ Exp).map[Exp]{ case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z) } ||
  M

lazy val M: Parser[Tokens, Exp] = 
  (T ~ OpParser(Set("+", "-")) ~ M).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } ||
  T

lazy val T: Parser[Tokens, Exp] = 
  (U ~ OpParser(Set("*", "/", "%")) ~ T).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } ||
  U

// includes negative factor
// a + - b CAN be recognised
// - - - b CAN be recognised
lazy val U: Parser[Tokens, Exp] =
  (OpParser(Set("-")) ~ U).map[Exp]{ case _ ~ y => Aop("*", Num(-1), y) } ||
  (OpParser(Set("+")) ~ U).map[Exp]{ case _ ~ y => y } ||
  F

lazy val F: Parser[Tokens, Exp] = 
  (p"(" ~ Exp ~ p")").map[Exp]{ case _ ~ y ~ _ => y } ||
  (p"skip").map(_ => Call("skip", Nil)) ||  // hardcoded
  (p"skip" ~ p"(" ~ p")").map(_ => Call("skip", Nil)) ||  // hardcoded
  (IdParser ~ p"(" ~ ListParser(Exp, p",") ~ p")").map[Exp]{ case id ~ _ ~ args ~ _ => Call(id, args) } ||
  (IdParser ~ p"(" ~ p")").map[Exp]{ case id ~ _ ~ _ => Call(id, Nil) } ||  // NOTE: empty args are also accepted!
  (IdParser || GlobalIdParser).map(x => Var(x)) ||
  IntParser.map(x => Num(x)) ||
  DoubleParser.map(x => FNum(x)) ||
  CharParser.map(x => ChConst(x.toInt)) ||
  (p"{" ~ Exps ~ p"}").map[Exp]{ case _ ~ x ~ _ => x }

lazy val BExp: Parser[Tokens, BExp] = 
  (Exp ~ OpParser(Set("==", "!=", "<", ">", "<=", ">=")) ~ Exp).map[BExp]{ case x ~ y ~ z => Bop(y, x, z) } ||
  (p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ y ~ _ => y }

lazy val TypedIdParser: Parser[Tokens, (String, String)] =
  (IdParser ~ p":" ~ TypeParser(Set("Int", "Double"))).map{ case n ~ _ ~ t => (n, t) }

lazy val Defn: Parser[Tokens, Decl] =
  (p"def" ~ IdParser ~ p"(" ~ ListParser(TypedIdParser, p",") ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{
    case _ ~ y ~ _ ~ w ~ _ ~ _ ~ t ~ _ ~ b => Def(y, w, t, b)
  } ||
  (p"def" ~ IdParser ~ p"(" ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{
    case _ ~ y ~ _ ~ _ ~ _ ~ t ~ _ ~ b => Def(y, Nil, t, b)
  }

lazy val Constp: Parser[Tokens, Decl] = 
  (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ IntParser).map[Decl]{  // IntParser? Not Exp? For this AST, impossible to define Exp
    case _ ~ id ~ _ ~ _ ~ _ ~ n => Const(id, n)
  } ||
  (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ IntParser).map[Decl]{  // IntParser? Not Exp? For this AST, impossible to define Exp
    case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => Const(id, -n)
  }

// Int can be converted to Double but not viceversa
lazy val FConstp: Parser[Tokens, Decl] =
  (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{
    case _ ~ id ~ _ ~ _ ~ _ ~ n => FConst(id, n)
  } ||
  (p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{
    case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => FConst(id, -n)
  }

// Prog consists of global const declarations, f(x) defs, and exp in ANY order
// restricted to main body at the bottom
lazy val Prog: Parser[Tokens, List[Decl]] = 
  (Defn ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
  (Constp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
  (FConstp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
  Exp.map[List[Decl]](s => List(Main(s)))


def parse_tks(tokens: Tokens) = Prog.parse_all(tokens)

import scala.io.Source._

@main
def parse(filename: String) = {
  val fun_code = fromFile(filename).getLines.mkString("\n")
  // print the AST list to screen
  println(parse_tks(tokenise(fun_code)))
}