| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Sat, 02 Dec 2023 21:37:04 +0000 | |
| changeset 957 | 03c5a8987141 | 
| parent 954 | 4a7ed272d46e | 
| child 960 | 791f4d9f53e1 | 
| permissions | -rw-r--r-- | 
| 645 | 1 | // A parser for the Fun language | 
| 2 | //================================ | |
| 3 | // | |
| 4 | // call with | |
| 5 | // | |
| 789 | 6 | // amm fun_parser.sc fact.fun | 
| 954 | 7 | |
| 789 | 8 | // amm fun_parser.sc defs.fun | 
| 9 | // | |
| 10 | // this will generate a parse-tree from a list | |
| 11 | // of tokens | |
| 644 | 12 | |
| 13 | import scala.language.implicitConversions | |
| 14 | import scala.language.reflectiveCalls | |
| 15 | ||
| 789 | 16 | import $file.fun_tokens, fun_tokens._ | 
| 644 | 17 | |
| 18 | ||
| 19 | // Parser combinators | |
| 20 | // type parameter I needs to be of Seq-type | |
| 21 | // | |
| 954 | 22 | type IsSeq[I] = I => Seq[_] | 
| 23 | ||
| 24 | /* | |
| 25 | abstract class Parser[I, T](using is: I => Seq[_])  {
 | |
| 26 | def parse(in: I): Set[(T, I)] | |
| 27 | ||
| 28 | def parse_all(in: I) : Set[T] = | |
| 29 | for ((hd, tl) <- parse(in); | |
| 30 | if is(tl).isEmpty) yield hd | |
| 31 | } | |
| 32 | */ | |
| 33 | ||
| 34 | ||
| 35 | abstract class Parser[I, T](using is: I => Seq[_]) {
 | |
| 644 | 36 | def parse(ts: I): Set[(T, I)] | 
| 37 | ||
| 655 | 38 | def parse_single(ts: I) : T = | 
| 954 | 39 |     parse(ts).partition(p => is(p._2).isEmpty) match {
 | 
| 655 | 40 | case (good, _) if !good.isEmpty => good.head._1 | 
| 954 | 41 |       case (_, err) => { println (s"Parse Error\n${err.minBy(p => is(p._2).length)}") ; sys.exit(-1) }
 | 
| 655 | 42 | } | 
| 644 | 43 | } | 
| 44 | ||
| 45 | // convenience for writing grammar rules | |
| 46 | case class ~[+A, +B](_1: A, _2: B) | |
| 47 | ||
| 954 | 48 | // parser combinators | 
| 49 | ||
| 50 | // alternative parser | |
| 51 | class AltParser[I : IsSeq, T](p: => Parser[I, T], | |
| 52 |                               q: => Parser[I, T]) extends Parser[I, T] {
 | |
| 53 | def parse(in: I) = p.parse(in) ++ q.parse(in) | |
| 644 | 54 | } | 
| 55 | ||
| 954 | 56 | // sequence parser | 
| 57 | class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], | |
| 58 |                                  q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
 | |
| 59 | def parse(in: I) = | |
| 60 | for ((hd1, tl1) <- p.parse(in); | |
| 61 | (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2) | |
| 644 | 62 | } | 
| 63 | ||
| 954 | 64 | // map parser | 
| 65 | class MapParser[I : IsSeq, T, S](p: => Parser[I, T], | |
| 66 |                                 f: T => S) extends Parser[I, S] {
 | |
| 67 | def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) | |
| 644 | 68 | } | 
| 69 | ||
| 954 | 70 | |
| 71 | ||
| 72 | // more convenient syntax for parser combinators | |
| 73 | extension [I : IsSeq, T](p: Parser[I, T]) {
 | |
| 74 | def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) | |
| 644 | 75 | def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) | 
| 954 | 76 | def map[S](f: => T => S) = new MapParser[I, T, S](p, f) | 
| 644 | 77 | } | 
| 78 | ||
| 954 | 79 | def ListParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S])(using is: I => Seq[_]): Parser[I, List[T]] = {
 | 
| 80 |   (p ~ q ~ ListParser(p, q)).map{ case (x:T) ~ (y:S) ~ (z:List[T]) => x :: z } ||
 | |
| 81 |   (p.map[List[T]]{s => List(s)})
 | |
| 644 | 82 | } | 
| 83 | ||
| 84 | case class TokParser(tok: Token) extends Parser[List[Token], Token] {
 | |
| 85 |   def parse(ts: List[Token]) = ts match {
 | |
| 86 | case t::ts if (t == tok) => Set((t, ts)) | |
| 87 | case _ => Set () | |
| 88 | } | |
| 89 | } | |
| 90 | ||
| 954 | 91 | implicit def token2tparser(t: Token) : Parser[List[Token], Token] = TokParser(t) | 
| 92 | ||
| 644 | 93 | |
| 954 | 94 | extension (t: Token) {
 | 
| 644 | 95 | def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q) | 
| 954 | 96 | def map[S] (f: => Token => S) = new MapParser[List[Token], Token, S](t, f) | 
| 644 | 97 | def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q) | 
| 98 | } | |
| 99 | ||
| 100 | case object NumParser extends Parser[List[Token], Int] {
 | |
| 101 |   def parse(ts: List[Token]) = ts match {
 | |
| 102 | case T_NUM(n)::ts => Set((n, ts)) | |
| 103 | case _ => Set () | |
| 104 | } | |
| 105 | } | |
| 106 | ||
| 107 | case object IdParser extends Parser[List[Token], String] {
 | |
| 108 |   def parse(ts: List[Token]) = ts match {
 | |
| 109 | case T_ID(s)::ts => Set((s, ts)) | |
| 110 | case _ => Set () | |
| 111 | } | |
| 112 | } | |
| 113 | ||
| 114 | ||
| 115 | ||
| 116 | // Abstract syntax trees for the Fun language | |
| 117 | abstract class Exp extends Serializable | |
| 118 | abstract class BExp extends Serializable | |
| 119 | abstract class Decl extends Serializable | |
| 120 | ||
| 121 | case class Def(name: String, args: List[String], body: Exp) extends Decl | |
| 122 | case class Main(e: Exp) extends Decl | |
| 123 | ||
| 124 | case class Call(name: String, args: List[Exp]) extends Exp | |
| 125 | case class If(a: BExp, e1: Exp, e2: Exp) extends Exp | |
| 126 | case class Write(e: Exp) extends Exp | |
| 127 | case class Var(s: String) extends Exp | |
| 128 | case class Num(i: Int) extends Exp | |
| 129 | case class Aop(o: String, a1: Exp, a2: Exp) extends Exp | |
| 130 | case class Sequence(e1: Exp, e2: Exp) extends Exp | |
| 131 | case class Bop(o: String, a1: Exp, a2: Exp) extends BExp | |
| 132 | ||
| 133 | ||
| 134 | ||
| 135 | // Grammar Rules for the Fun language | |
| 136 | ||
| 137 | // arithmetic expressions | |
| 138 | lazy val Exp: Parser[List[Token], Exp] = | |
| 954 | 139 |   (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp).map{ case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z): Exp } ||
 | 
| 140 |   (M ~ T_SEMI ~ Exp).map{ case x ~ _ ~ y => Sequence(x, y): Exp } || M
 | |
| 644 | 141 | lazy val M: Parser[List[Token], Exp] = | 
| 954 | 142 |   (T_KWD("write") ~ L).map{ case _ ~ y => Write(y): Exp } || L
 | 
| 644 | 143 | lazy val L: Parser[List[Token], Exp] = | 
| 954 | 144 |   (T ~ T_OP("+") ~ Exp).map{ case x ~ _ ~ z => Aop("+", x, z): Exp } ||
 | 
| 145 |   (T ~ T_OP("-") ~ Exp).map{ case x ~ _ ~ z => Aop("-", x, z): Exp } || T  
 | |
| 644 | 146 | lazy val T: Parser[List[Token], Exp] = | 
| 954 | 147 |   (F ~ T_OP("*") ~ T).map{ case x ~ _ ~ z => Aop("*", x, z): Exp } || 
 | 
| 148 |   (F ~ T_OP("/") ~ T).map{ case x ~ _ ~ z => Aop("/", x, z): Exp } || 
 | |
| 149 |   (F ~ T_OP("%") ~ T).map{ case x ~ _ ~ z => Aop("%", x, z): Exp } || F
 | |
| 644 | 150 | lazy val F: Parser[List[Token], Exp] = | 
| 954 | 151 | (IdParser ~ T_LPAREN ~ ListParser(Exp, T_COMMA) ~ T_RPAREN).map | 
| 644 | 152 |     { case x ~ _ ~ z ~ _ => Call(x, z): Exp } ||
 | 
| 954 | 153 |   (T_LPAREN ~ Exp ~ T_RPAREN).map{ case _ ~ y ~ _ => y: Exp } || 
 | 
| 154 |   IdParser.map{ case x => Var(x): Exp } || 
 | |
| 155 |   NumParser.map{ case x => Num(x): Exp }
 | |
| 644 | 156 | |
| 157 | // boolean expressions | |
| 158 | lazy val BExp: Parser[List[Token], BExp] = | |
| 954 | 159 |   (Exp ~ T_OP("==") ~ Exp).map{ case x ~ _ ~ z => Bop("==", x, z): BExp } || 
 | 
| 160 |   (Exp ~ T_OP("!=") ~ Exp).map{ case x ~ _ ~ z => Bop("!=", x, z): BExp } || 
 | |
| 161 |   (Exp ~ T_OP("<") ~ Exp) .map{ case x ~ _ ~ z => Bop("<",  x, z): BExp } || 
 | |
| 162 |   (Exp ~ T_OP(">") ~ Exp) .map{ case x ~ _ ~ z => Bop("<",  z, x): BExp } || 
 | |
| 163 |   (Exp ~ T_OP("<=") ~ Exp).map{ case x ~ _ ~ z => Bop("<=", x, z): BExp } || 
 | |
| 164 |   (Exp ~ T_OP("=>") ~ Exp).map{ case x ~ _ ~ z => Bop("<=", z, x): BExp }  
 | |
| 644 | 165 | |
| 166 | lazy val Defn: Parser[List[Token], Decl] = | |
| 954 | 167 |    (T_KWD("def") ~ IdParser ~ T_LPAREN ~ ListParser(IdParser, T_COMMA) ~ T_RPAREN ~ T_OP("=") ~ Exp).map{ case _ ~ y ~ _ ~ w ~ _ ~ _ ~ r => Def(y, w, r): Decl }
 | 
| 644 | 168 | |
| 169 | lazy val Prog: Parser[List[Token], List[Decl]] = | |
| 954 | 170 |   (Defn ~ T_SEMI ~ Prog).map{ case x ~ _ ~ z => x :: z : List[Decl] } ||
 | 
| 171 | (Exp.map((s) => List(Main(s)) : List[Decl])) | |
| 644 | 172 | |
| 173 | ||
| 174 | ||
| 175 | // Reading tokens and Writing parse trees | |
| 176 | ||
| 869 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
822diff
changeset | 177 | // pre-2.5.0 ammonite | 
| 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
822diff
changeset | 178 | // import ammonite.ops._ | 
| 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
822diff
changeset | 179 | |
| 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
822diff
changeset | 180 | // post 2.5.0 ammonite | 
| 870 
1ea379515c6d
ivy import not needed
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
869diff
changeset | 181 | // import os._ | 
| 789 | 182 | |
| 183 | def parse_tks(tks: List[Token]) : List[Decl] = | |
| 184 | Prog.parse_single(tks) | |
| 644 | 185 | |
| 822 | 186 | //@doc("Parses a file.")
 | 
| 789 | 187 | @main | 
| 188 | def main(fname: String) : Unit = {
 | |
| 189 | val tks = tokenise(os.read(os.pwd / fname)) | |
| 190 | println(parse_tks(tks)) | |
| 644 | 191 | } | 
| 192 | ||
| 193 |