| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Thu, 19 Sep 2024 19:21:52 +0100 | |
| changeset 962 | 2ecd511e62fa | 
| parent 960 | 791f4d9f53e1 | 
| child 974 | 06148fc63273 | 
| permissions | -rw-r--r-- | 
| 645 | 1 | // A parser for the Fun language | 
| 2 | //================================ | |
| 3 | // | |
| 4 | // call with | |
| 5 | // | |
| 789 | 6 | // amm fun_parser.sc fact.fun | 
| 954 | 7 | |
| 789 | 8 | // amm fun_parser.sc defs.fun | 
| 9 | // | |
| 10 | // this will generate a parse-tree from a list | |
| 11 | // of tokens | |
| 644 | 12 | |
| 960 | 13 | //> using toolkit 0.5.0 | 
| 14 | // > using file fun_tokens.scala | |
| 15 | ||
| 16 | ||
| 644 | 17 | import scala.language.implicitConversions | 
| 18 | import scala.language.reflectiveCalls | |
| 19 | ||
| 960 | 20 | |
| 21 | //import $file.fun_tokens, fun_tokens._ | |
| 644 | 22 | |
| 23 | ||
| 24 | // Parser combinators | |
| 25 | // type parameter I needs to be of Seq-type | |
| 26 | // | |
| 960 | 27 | type IsSeq[I] = I => Seq[?] | 
| 954 | 28 | |
| 29 | /* | |
| 30 | abstract class Parser[I, T](using is: I => Seq[_])  {
 | |
| 31 | def parse(in: I): Set[(T, I)] | |
| 32 | ||
| 33 | def parse_all(in: I) : Set[T] = | |
| 34 | for ((hd, tl) <- parse(in); | |
| 35 | if is(tl).isEmpty) yield hd | |
| 36 | } | |
| 37 | */ | |
| 38 | ||
| 39 | ||
| 960 | 40 | abstract class Parser[I, T](using is: I => Seq[?]) {
 | 
| 644 | 41 | def parse(ts: I): Set[(T, I)] | 
| 42 | ||
| 655 | 43 | def parse_single(ts: I) : T = | 
| 954 | 44 |     parse(ts).partition(p => is(p._2).isEmpty) match {
 | 
| 655 | 45 | case (good, _) if !good.isEmpty => good.head._1 | 
| 954 | 46 |       case (_, err) => { println (s"Parse Error\n${err.minBy(p => is(p._2).length)}") ; sys.exit(-1) }
 | 
| 655 | 47 | } | 
| 644 | 48 | } | 
| 49 | ||
| 50 | // convenience for writing grammar rules | |
| 51 | case class ~[+A, +B](_1: A, _2: B) | |
| 52 | ||
| 954 | 53 | // parser combinators | 
| 54 | ||
| 55 | // alternative parser | |
| 56 | class AltParser[I : IsSeq, T](p: => Parser[I, T], | |
| 57 |                               q: => Parser[I, T]) extends Parser[I, T] {
 | |
| 58 | def parse(in: I) = p.parse(in) ++ q.parse(in) | |
| 644 | 59 | } | 
| 60 | ||
| 954 | 61 | // sequence parser | 
| 62 | class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], | |
| 63 |                                  q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
 | |
| 64 | def parse(in: I) = | |
| 65 | for ((hd1, tl1) <- p.parse(in); | |
| 66 | (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2) | |
| 644 | 67 | } | 
| 68 | ||
| 954 | 69 | // map parser | 
| 70 | class MapParser[I : IsSeq, T, S](p: => Parser[I, T], | |
| 71 |                                 f: T => S) extends Parser[I, S] {
 | |
| 72 | def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) | |
| 644 | 73 | } | 
| 74 | ||
| 954 | 75 | |
| 76 | ||
| 77 | // more convenient syntax for parser combinators | |
| 78 | extension [I : IsSeq, T](p: Parser[I, T]) {
 | |
| 79 | def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) | |
| 644 | 80 | def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) | 
| 954 | 81 | def map[S](f: => T => S) = new MapParser[I, T, S](p, f) | 
| 644 | 82 | } | 
| 83 | ||
| 960 | 84 | def ListParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S])(using is: I => Seq[?]): Parser[I, List[T]] = {
 | 
| 954 | 85 |   (p ~ q ~ ListParser(p, q)).map{ case (x:T) ~ (y:S) ~ (z:List[T]) => x :: z } ||
 | 
| 86 |   (p.map[List[T]]{s => List(s)})
 | |
| 644 | 87 | } | 
| 88 | ||
| 89 | case class TokParser(tok: Token) extends Parser[List[Token], Token] {
 | |
| 90 |   def parse(ts: List[Token]) = ts match {
 | |
| 91 | case t::ts if (t == tok) => Set((t, ts)) | |
| 92 | case _ => Set () | |
| 93 | } | |
| 94 | } | |
| 95 | ||
| 954 | 96 | implicit def token2tparser(t: Token) : Parser[List[Token], Token] = TokParser(t) | 
| 97 | ||
| 644 | 98 | |
| 954 | 99 | extension (t: Token) {
 | 
| 644 | 100 | def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q) | 
| 954 | 101 | def map[S] (f: => Token => S) = new MapParser[List[Token], Token, S](t, f) | 
| 644 | 102 | def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q) | 
| 103 | } | |
| 104 | ||
| 105 | case object NumParser extends Parser[List[Token], Int] {
 | |
| 106 |   def parse(ts: List[Token]) = ts match {
 | |
| 107 | case T_NUM(n)::ts => Set((n, ts)) | |
| 108 | case _ => Set () | |
| 109 | } | |
| 110 | } | |
| 111 | ||
| 112 | case object IdParser extends Parser[List[Token], String] {
 | |
| 113 |   def parse(ts: List[Token]) = ts match {
 | |
| 114 | case T_ID(s)::ts => Set((s, ts)) | |
| 115 | case _ => Set () | |
| 116 | } | |
| 117 | } | |
| 118 | ||
| 119 | ||
| 120 | ||
| 121 | // Abstract syntax trees for the Fun language | |
| 122 | abstract class Exp extends Serializable | |
| 123 | abstract class BExp extends Serializable | |
| 124 | abstract class Decl extends Serializable | |
| 125 | ||
| 126 | case class Def(name: String, args: List[String], body: Exp) extends Decl | |
| 127 | case class Main(e: Exp) extends Decl | |
| 128 | ||
| 129 | case class Call(name: String, args: List[Exp]) extends Exp | |
| 130 | case class If(a: BExp, e1: Exp, e2: Exp) extends Exp | |
| 131 | case class Write(e: Exp) extends Exp | |
| 132 | case class Var(s: String) extends Exp | |
| 133 | case class Num(i: Int) extends Exp | |
| 134 | case class Aop(o: String, a1: Exp, a2: Exp) extends Exp | |
| 135 | case class Sequence(e1: Exp, e2: Exp) extends Exp | |
| 136 | case class Bop(o: String, a1: Exp, a2: Exp) extends BExp | |
| 137 | ||
| 138 | ||
| 139 | ||
| 140 | // Grammar Rules for the Fun language | |
| 141 | ||
| 142 | // arithmetic expressions | |
| 143 | lazy val Exp: Parser[List[Token], Exp] = | |
| 954 | 144 |   (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp).map{ case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z): Exp } ||
 | 
| 145 |   (M ~ T_SEMI ~ Exp).map{ case x ~ _ ~ y => Sequence(x, y): Exp } || M
 | |
| 644 | 146 | lazy val M: Parser[List[Token], Exp] = | 
| 954 | 147 |   (T_KWD("write") ~ L).map{ case _ ~ y => Write(y): Exp } || L
 | 
| 644 | 148 | lazy val L: Parser[List[Token], Exp] = | 
| 954 | 149 |   (T ~ T_OP("+") ~ Exp).map{ case x ~ _ ~ z => Aop("+", x, z): Exp } ||
 | 
| 150 |   (T ~ T_OP("-") ~ Exp).map{ case x ~ _ ~ z => Aop("-", x, z): Exp } || T  
 | |
| 644 | 151 | lazy val T: Parser[List[Token], Exp] = | 
| 954 | 152 |   (F ~ T_OP("*") ~ T).map{ case x ~ _ ~ z => Aop("*", x, z): Exp } || 
 | 
| 153 |   (F ~ T_OP("/") ~ T).map{ case x ~ _ ~ z => Aop("/", x, z): Exp } || 
 | |
| 154 |   (F ~ T_OP("%") ~ T).map{ case x ~ _ ~ z => Aop("%", x, z): Exp } || F
 | |
| 644 | 155 | lazy val F: Parser[List[Token], Exp] = | 
| 954 | 156 | (IdParser ~ T_LPAREN ~ ListParser(Exp, T_COMMA) ~ T_RPAREN).map | 
| 644 | 157 |     { case x ~ _ ~ z ~ _ => Call(x, z): Exp } ||
 | 
| 954 | 158 |   (T_LPAREN ~ Exp ~ T_RPAREN).map{ case _ ~ y ~ _ => y: Exp } || 
 | 
| 159 |   IdParser.map{ case x => Var(x): Exp } || 
 | |
| 160 |   NumParser.map{ case x => Num(x): Exp }
 | |
| 644 | 161 | |
| 162 | // boolean expressions | |
| 163 | lazy val BExp: Parser[List[Token], BExp] = | |
| 954 | 164 |   (Exp ~ T_OP("==") ~ Exp).map{ case x ~ _ ~ z => Bop("==", x, z): BExp } || 
 | 
| 165 |   (Exp ~ T_OP("!=") ~ Exp).map{ case x ~ _ ~ z => Bop("!=", x, z): BExp } || 
 | |
| 166 |   (Exp ~ T_OP("<") ~ Exp) .map{ case x ~ _ ~ z => Bop("<",  x, z): BExp } || 
 | |
| 167 |   (Exp ~ T_OP(">") ~ Exp) .map{ case x ~ _ ~ z => Bop("<",  z, x): BExp } || 
 | |
| 168 |   (Exp ~ T_OP("<=") ~ Exp).map{ case x ~ _ ~ z => Bop("<=", x, z): BExp } || 
 | |
| 169 |   (Exp ~ T_OP("=>") ~ Exp).map{ case x ~ _ ~ z => Bop("<=", z, x): BExp }  
 | |
| 644 | 170 | |
| 171 | lazy val Defn: Parser[List[Token], Decl] = | |
| 954 | 172 |    (T_KWD("def") ~ IdParser ~ T_LPAREN ~ ListParser(IdParser, T_COMMA) ~ T_RPAREN ~ T_OP("=") ~ Exp).map{ case _ ~ y ~ _ ~ w ~ _ ~ _ ~ r => Def(y, w, r): Decl }
 | 
| 644 | 173 | |
| 174 | lazy val Prog: Parser[List[Token], List[Decl]] = | |
| 954 | 175 |   (Defn ~ T_SEMI ~ Prog).map{ case x ~ _ ~ z => x :: z : List[Decl] } ||
 | 
| 176 | (Exp.map((s) => List(Main(s)) : List[Decl])) | |
| 644 | 177 | |
| 178 | ||
| 179 | ||
| 180 | // Reading tokens and Writing parse trees | |
| 181 | ||
| 869 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
822diff
changeset | 182 | // pre-2.5.0 ammonite | 
| 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
822diff
changeset | 183 | // import ammonite.ops._ | 
| 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
822diff
changeset | 184 | |
| 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
822diff
changeset | 185 | // post 2.5.0 ammonite | 
| 870 
1ea379515c6d
ivy import not needed
 Christian Urban <christian.urban@kcl.ac.uk> parents: 
869diff
changeset | 186 | // import os._ | 
| 789 | 187 | |
| 188 | def parse_tks(tks: List[Token]) : List[Decl] = | |
| 189 | Prog.parse_single(tks) | |
| 644 | 190 | |
| 822 | 191 | //@doc("Parses a file.")
 | 
| 960 | 192 | //@main | 
| 789 | 193 | def main(fname: String) : Unit = {
 | 
| 194 | val tks = tokenise(os.read(os.pwd / fname)) | |
| 195 | println(parse_tks(tks)) | |
| 644 | 196 | } | 
| 197 | ||
| 198 |