| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Mon, 10 Jul 2023 18:48:35 +0100 | |
| changeset 915 | 88a25a236cb2 | 
| parent 870 | 1ea379515c6d | 
| child 954 | 4a7ed272d46e | 
| permissions | -rw-r--r-- | 
| 645 | 1 | // A parser for the Fun language | 
| 2 | //================================ | |
| 3 | // | |
| 4 | // call with | |
| 5 | // | |
| 789 | 6 | // amm fun_parser.sc fact.fun | 
| 645 | 7 | // | 
| 789 | 8 | // amm fun_parser.sc defs.fun | 
| 9 | // | |
| 10 | // this will generate a parse-tree from a list | |
| 11 | // of tokens | |
| 644 | 12 | |
| 13 | import scala.language.implicitConversions | |
| 14 | import scala.language.reflectiveCalls | |
| 15 | ||
| 789 | 16 | import $file.fun_tokens, fun_tokens._ | 
| 644 | 17 | |
| 18 | ||
| 19 | // Parser combinators | |
| 20 | // type parameter I needs to be of Seq-type | |
| 21 | // | |
| 22 | abstract class Parser[I, T](implicit ev: I => Seq[_]) {
 | |
| 23 | def parse(ts: I): Set[(T, I)] | |
| 24 | ||
| 655 | 25 | def parse_single(ts: I) : T = | 
| 26 |     parse(ts).partition(_._2.isEmpty) match {
 | |
| 27 | case (good, _) if !good.isEmpty => good.head._1 | |
| 28 |       case (_, err) => { 
 | |
| 29 | 	println (s"Parse Error\n${err.minBy(_._2.length)}") ; sys.exit(-1) }
 | |
| 30 | } | |
| 644 | 31 | } | 
| 32 | ||
| 33 | // convenience for writing grammar rules | |
| 34 | case class ~[+A, +B](_1: A, _2: B) | |
| 35 | ||
| 36 | class SeqParser[I, T, S](p: => Parser[I, T], | |
| 37 |                          q: => Parser[I, S])(implicit ev: I => Seq[_]) extends Parser[I, ~[T, S]] {
 | |
| 38 | def parse(sb: I) = | |
| 39 | for ((head1, tail1) <- p.parse(sb); | |
| 40 | (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2) | |
| 41 | } | |
| 42 | ||
| 43 | class AltParser[I, T](p: => Parser[I, T], | |
| 44 |                       q: => Parser[I, T])(implicit ev: I => Seq[_]) extends Parser[I, T] {
 | |
| 45 | def parse(sb: I) = p.parse(sb) ++ q.parse(sb) | |
| 46 | } | |
| 47 | ||
| 48 | class FunParser[I, T, S](p: => Parser[I, T], | |
| 49 |                          f: T => S)(implicit ev: I => Seq[_]) extends Parser[I, S] {
 | |
| 50 | def parse(sb: I) = | |
| 51 | for ((head, tail) <- p.parse(sb)) yield (f(head), tail) | |
| 52 | } | |
| 53 | ||
| 54 | // convenient combinators | |
| 55 | implicit def ParserOps[I, T](p: Parser[I, T])(implicit ev: I => Seq[_]) = new {
 | |
| 56 | def || (q : => Parser[I, T]) = new AltParser[I, T](p, q) | |
| 57 | def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f) | |
| 58 | def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) | |
| 59 | } | |
| 60 | ||
| 61 | def ListParser[I, T, S](p: => Parser[I, T], | |
| 62 |                         q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
 | |
| 63 |   (p ~ q ~ ListParser(p, q)) ==> { case x ~ _ ~ z => x :: z : List[T] } ||
 | |
| 64 | (p ==> ((s) => List(s))) | |
| 65 | } | |
| 66 | ||
| 67 | case class TokParser(tok: Token) extends Parser[List[Token], Token] {
 | |
| 68 |   def parse(ts: List[Token]) = ts match {
 | |
| 69 | case t::ts if (t == tok) => Set((t, ts)) | |
| 70 | case _ => Set () | |
| 71 | } | |
| 72 | } | |
| 73 | ||
| 74 | implicit def token2tparser(t: Token) = TokParser(t) | |
| 75 | ||
| 76 | implicit def TokOps(t: Token) = new {
 | |
| 77 | def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q) | |
| 78 | def ==>[S] (f: => Token => S) = new FunParser[List[Token], Token, S](t, f) | |
| 79 | def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q) | |
| 80 | } | |
| 81 | ||
| 82 | case object NumParser extends Parser[List[Token], Int] {
 | |
| 83 |   def parse(ts: List[Token]) = ts match {
 | |
| 84 | case T_NUM(n)::ts => Set((n, ts)) | |
| 85 | case _ => Set () | |
| 86 | } | |
| 87 | } | |
| 88 | ||
| 89 | case object IdParser extends Parser[List[Token], String] {
 | |
| 90 |   def parse(ts: List[Token]) = ts match {
 | |
| 91 | case T_ID(s)::ts => Set((s, ts)) | |
| 92 | case _ => Set () | |
| 93 | } | |
| 94 | } | |
| 95 | ||
| 96 | ||
| 97 | ||
| 98 | // Abstract syntax trees for the Fun language | |
| 99 | abstract class Exp extends Serializable | |
| 100 | abstract class BExp extends Serializable | |
| 101 | abstract class Decl extends Serializable | |
| 102 | ||
| 103 | case class Def(name: String, args: List[String], body: Exp) extends Decl | |
| 104 | case class Main(e: Exp) extends Decl | |
| 105 | ||
| 106 | case class Call(name: String, args: List[Exp]) extends Exp | |
| 107 | case class If(a: BExp, e1: Exp, e2: Exp) extends Exp | |
| 108 | case class Write(e: Exp) extends Exp | |
| 109 | case class Var(s: String) extends Exp | |
| 110 | case class Num(i: Int) extends Exp | |
| 111 | case class Aop(o: String, a1: Exp, a2: Exp) extends Exp | |
| 112 | case class Sequence(e1: Exp, e2: Exp) extends Exp | |
| 113 | case class Bop(o: String, a1: Exp, a2: Exp) extends BExp | |
| 114 | ||
| 115 | ||
| 116 | ||
| 117 | // Grammar Rules for the Fun language | |
| 118 | ||
| 119 | // arithmetic expressions | |
| 120 | lazy val Exp: Parser[List[Token], Exp] = | |
| 121 |   (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp) ==>
 | |
| 122 |     { case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z): Exp } ||
 | |
| 123 |   (M ~ T_SEMI ~ Exp) ==> { case x ~ _ ~ y => Sequence(x, y): Exp } || M
 | |
| 124 | lazy val M: Parser[List[Token], Exp] = | |
| 125 |   (T_KWD("write") ~ L) ==> { case _ ~ y => Write(y): Exp } || L
 | |
| 126 | lazy val L: Parser[List[Token], Exp] = | |
| 127 |   (T ~ T_OP("+") ~ Exp) ==> { case x ~ _ ~ z => Aop("+", x, z): Exp } ||
 | |
| 128 |   (T ~ T_OP("-") ~ Exp) ==> { case x ~ _ ~ z => Aop("-", x, z): Exp } || T  
 | |
| 129 | lazy val T: Parser[List[Token], Exp] = | |
| 130 |   (F ~ T_OP("*") ~ T) ==> { case x ~ _ ~ z => Aop("*", x, z): Exp } || 
 | |
| 131 |   (F ~ T_OP("/") ~ T) ==> { case x ~ _ ~ z => Aop("/", x, z): Exp } || 
 | |
| 132 |   (F ~ T_OP("%") ~ T) ==> { case x ~ _ ~ z => Aop("%", x, z): Exp } || F
 | |
| 133 | lazy val F: Parser[List[Token], Exp] = | |
| 134 | (IdParser ~ T_LPAREN ~ ListParser(Exp, T_COMMA) ~ T_RPAREN) ==> | |
| 135 |     { case x ~ _ ~ z ~ _ => Call(x, z): Exp } ||
 | |
| 136 |   (T_LPAREN ~ Exp ~ T_RPAREN) ==> { case _ ~ y ~ _ => y: Exp } || 
 | |
| 137 |   IdParser ==> { case x => Var(x): Exp } || 
 | |
| 138 |   NumParser ==> { case x => Num(x): Exp }
 | |
| 139 | ||
| 140 | // boolean expressions | |
| 141 | lazy val BExp: Parser[List[Token], BExp] = | |
| 142 |   (Exp ~ T_OP("==") ~ Exp) ==> { case x ~ _ ~ z => Bop("==", x, z): BExp } || 
 | |
| 143 |   (Exp ~ T_OP("!=") ~ Exp) ==> { case x ~ _ ~ z => Bop("!=", x, z): BExp } || 
 | |
| 699 | 144 |   (Exp ~ T_OP("<") ~ Exp)  ==> { case x ~ _ ~ z => Bop("<",  x, z): BExp } || 
 | 
| 145 |   (Exp ~ T_OP(">") ~ Exp)  ==> { case x ~ _ ~ z => Bop("<",  z, x): BExp } || 
 | |
| 644 | 146 |   (Exp ~ T_OP("<=") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", x, z): BExp } || 
 | 
| 147 |   (Exp ~ T_OP("=>") ~ Exp) ==> { case x ~ _ ~ z => Bop("<=", z, x): BExp }  
 | |
| 148 | ||
| 149 | lazy val Defn: Parser[List[Token], Decl] = | |
| 150 |    (T_KWD("def") ~ IdParser ~ T_LPAREN ~ ListParser(IdParser, T_COMMA) ~ T_RPAREN ~ T_OP("=") ~ Exp) ==>
 | |
| 151 |      { case _ ~ y ~ _ ~ w ~ _ ~ _ ~ r => Def(y, w, r): Decl }
 | |
| 152 | ||
| 153 | lazy val Prog: Parser[List[Token], List[Decl]] = | |
| 154 |   (Defn ~ T_SEMI ~ Prog) ==> { case x ~ _ ~ z => x :: z : List[Decl] } ||
 | |
| 155 | (Exp ==> ((s) => List(Main(s)) : List[Decl])) | |
| 156 | ||
| 157 | ||
| 158 | ||
| 159 | // Reading tokens and Writing parse trees | |
| 160 | ||
| 869 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
822diff
changeset | 161 | // pre-2.5.0 ammonite | 
| 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
822diff
changeset | 162 | // import ammonite.ops._ | 
| 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
822diff
changeset | 163 | |
| 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
822diff
changeset | 164 | // post 2.5.0 ammonite | 
| 870 
1ea379515c6d
ivy import not needed
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
869diff
changeset | 165 | // import os._ | 
| 789 | 166 | |
| 167 | def parse_tks(tks: List[Token]) : List[Decl] = | |
| 168 | Prog.parse_single(tks) | |
| 644 | 169 | |
| 822 | 170 | //@doc("Parses a file.")
 | 
| 789 | 171 | @main | 
| 172 | def main(fname: String) : Unit = {
 | |
| 173 | val tks = tokenise(os.read(os.pwd / fname)) | |
| 174 | println(parse_tks(tks)) | |
| 644 | 175 | } | 
| 176 | ||
| 177 |