| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Fri, 17 Nov 2023 20:06:43 +0000 | |
| changeset 954 | 4a7ed272d46e | 
| parent 870 | 1ea379515c6d | 
| child 960 | 791f4d9f53e1 | 
| permissions | -rw-r--r-- | 
| 645 | 1  | 
// A parser for the Fun language  | 
2  | 
//================================  | 
|
3  | 
//  | 
|
4  | 
// call with  | 
|
5  | 
//  | 
|
| 789 | 6  | 
// amm fun_parser.sc fact.fun  | 
| 954 | 7  | 
|
| 789 | 8  | 
// amm fun_parser.sc defs.fun  | 
9  | 
//  | 
|
10  | 
// this will generate a parse-tree from a list  | 
|
11  | 
// of tokens  | 
|
| 644 | 12  | 
|
13  | 
import scala.language.implicitConversions  | 
|
14  | 
import scala.language.reflectiveCalls  | 
|
15  | 
||
| 789 | 16  | 
import $file.fun_tokens, fun_tokens._  | 
| 644 | 17  | 
|
18  | 
||
19  | 
// Parser combinators  | 
|
20  | 
// type parameter I needs to be of Seq-type  | 
|
21  | 
//  | 
|
| 954 | 22  | 
type IsSeq[I] = I => Seq[_]  | 
23  | 
||
24  | 
/*  | 
|
25  | 
abstract class Parser[I, T](using is: I => Seq[_])  {
 | 
|
26  | 
def parse(in: I): Set[(T, I)]  | 
|
27  | 
||
28  | 
def parse_all(in: I) : Set[T] =  | 
|
29  | 
for ((hd, tl) <- parse(in);  | 
|
30  | 
if is(tl).isEmpty) yield hd  | 
|
31  | 
}  | 
|
32  | 
*/  | 
|
33  | 
||
34  | 
||
35  | 
abstract class Parser[I, T](using is: I => Seq[_]) {
 | 
|
| 644 | 36  | 
def parse(ts: I): Set[(T, I)]  | 
37  | 
||
| 655 | 38  | 
def parse_single(ts: I) : T =  | 
| 954 | 39  | 
    parse(ts).partition(p => is(p._2).isEmpty) match {
 | 
| 655 | 40  | 
case (good, _) if !good.isEmpty => good.head._1  | 
| 954 | 41  | 
      case (_, err) => { println (s"Parse Error\n${err.minBy(p => is(p._2).length)}") ; sys.exit(-1) }
 | 
| 655 | 42  | 
}  | 
| 644 | 43  | 
}  | 
44  | 
||
45  | 
// convenience for writing grammar rules  | 
|
46  | 
case class ~[+A, +B](_1: A, _2: B)  | 
|
47  | 
||
| 954 | 48  | 
// parser combinators  | 
49  | 
||
50  | 
// alternative parser  | 
|
51  | 
class AltParser[I : IsSeq, T](p: => Parser[I, T],  | 
|
52  | 
                              q: => Parser[I, T]) extends Parser[I, T] {
 | 
|
53  | 
def parse(in: I) = p.parse(in) ++ q.parse(in)  | 
|
| 644 | 54  | 
}  | 
55  | 
||
| 954 | 56  | 
// sequence parser  | 
57  | 
class SeqParser[I : IsSeq, T, S](p: => Parser[I, T],  | 
|
58  | 
                                 q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
 | 
|
59  | 
def parse(in: I) =  | 
|
60  | 
for ((hd1, tl1) <- p.parse(in);  | 
|
61  | 
(hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)  | 
|
| 644 | 62  | 
}  | 
63  | 
||
| 954 | 64  | 
// map parser  | 
65  | 
class MapParser[I : IsSeq, T, S](p: => Parser[I, T],  | 
|
66  | 
                                f: T => S) extends Parser[I, S] {
 | 
|
67  | 
def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)  | 
|
| 644 | 68  | 
}  | 
69  | 
||
| 954 | 70  | 
|
71  | 
||
72  | 
// more convenient syntax for parser combinators  | 
|
73  | 
extension [I : IsSeq, T](p: Parser[I, T]) {
 | 
|
74  | 
def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)  | 
|
| 644 | 75  | 
def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)  | 
| 954 | 76  | 
def map[S](f: => T => S) = new MapParser[I, T, S](p, f)  | 
| 644 | 77  | 
}  | 
78  | 
||
| 954 | 79  | 
def ListParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S])(using is: I => Seq[_]): Parser[I, List[T]] = {
 | 
80  | 
  (p ~ q ~ ListParser(p, q)).map{ case (x:T) ~ (y:S) ~ (z:List[T]) => x :: z } ||
 | 
|
81  | 
  (p.map[List[T]]{s => List(s)})
 | 
|
| 644 | 82  | 
}  | 
83  | 
||
84  | 
case class TokParser(tok: Token) extends Parser[List[Token], Token] {
 | 
|
85  | 
  def parse(ts: List[Token]) = ts match {
 | 
|
86  | 
case t::ts if (t == tok) => Set((t, ts))  | 
|
87  | 
case _ => Set ()  | 
|
88  | 
}  | 
|
89  | 
}  | 
|
90  | 
||
| 954 | 91  | 
implicit def token2tparser(t: Token) : Parser[List[Token], Token] = TokParser(t)  | 
92  | 
||
| 644 | 93  | 
|
| 954 | 94  | 
extension (t: Token) {
 | 
| 644 | 95  | 
def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q)  | 
| 954 | 96  | 
def map[S] (f: => Token => S) = new MapParser[List[Token], Token, S](t, f)  | 
| 644 | 97  | 
def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q)  | 
98  | 
}  | 
|
99  | 
||
100  | 
case object NumParser extends Parser[List[Token], Int] {
 | 
|
101  | 
  def parse(ts: List[Token]) = ts match {
 | 
|
102  | 
case T_NUM(n)::ts => Set((n, ts))  | 
|
103  | 
case _ => Set ()  | 
|
104  | 
}  | 
|
105  | 
}  | 
|
106  | 
||
107  | 
case object IdParser extends Parser[List[Token], String] {
 | 
|
108  | 
  def parse(ts: List[Token]) = ts match {
 | 
|
109  | 
case T_ID(s)::ts => Set((s, ts))  | 
|
110  | 
case _ => Set ()  | 
|
111  | 
}  | 
|
112  | 
}  | 
|
113  | 
||
114  | 
||
115  | 
||
116  | 
// Abstract syntax trees for the Fun language  | 
|
117  | 
abstract class Exp extends Serializable  | 
|
118  | 
abstract class BExp extends Serializable  | 
|
119  | 
abstract class Decl extends Serializable  | 
|
120  | 
||
121  | 
case class Def(name: String, args: List[String], body: Exp) extends Decl  | 
|
122  | 
case class Main(e: Exp) extends Decl  | 
|
123  | 
||
124  | 
case class Call(name: String, args: List[Exp]) extends Exp  | 
|
125  | 
case class If(a: BExp, e1: Exp, e2: Exp) extends Exp  | 
|
126  | 
case class Write(e: Exp) extends Exp  | 
|
127  | 
case class Var(s: String) extends Exp  | 
|
128  | 
case class Num(i: Int) extends Exp  | 
|
129  | 
case class Aop(o: String, a1: Exp, a2: Exp) extends Exp  | 
|
130  | 
case class Sequence(e1: Exp, e2: Exp) extends Exp  | 
|
131  | 
case class Bop(o: String, a1: Exp, a2: Exp) extends BExp  | 
|
132  | 
||
133  | 
||
134  | 
||
135  | 
// Grammar Rules for the Fun language  | 
|
136  | 
||
137  | 
// arithmetic expressions  | 
|
138  | 
lazy val Exp: Parser[List[Token], Exp] =  | 
|
| 954 | 139  | 
  (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp).map{ case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z): Exp } ||
 | 
140  | 
  (M ~ T_SEMI ~ Exp).map{ case x ~ _ ~ y => Sequence(x, y): Exp } || M
 | 
|
| 644 | 141  | 
lazy val M: Parser[List[Token], Exp] =  | 
| 954 | 142  | 
  (T_KWD("write") ~ L).map{ case _ ~ y => Write(y): Exp } || L
 | 
| 644 | 143  | 
lazy val L: Parser[List[Token], Exp] =  | 
| 954 | 144  | 
  (T ~ T_OP("+") ~ Exp).map{ case x ~ _ ~ z => Aop("+", x, z): Exp } ||
 | 
145  | 
  (T ~ T_OP("-") ~ Exp).map{ case x ~ _ ~ z => Aop("-", x, z): Exp } || T  
 | 
|
| 644 | 146  | 
lazy val T: Parser[List[Token], Exp] =  | 
| 954 | 147  | 
  (F ~ T_OP("*") ~ T).map{ case x ~ _ ~ z => Aop("*", x, z): Exp } || 
 | 
148  | 
  (F ~ T_OP("/") ~ T).map{ case x ~ _ ~ z => Aop("/", x, z): Exp } || 
 | 
|
149  | 
  (F ~ T_OP("%") ~ T).map{ case x ~ _ ~ z => Aop("%", x, z): Exp } || F
 | 
|
| 644 | 150  | 
lazy val F: Parser[List[Token], Exp] =  | 
| 954 | 151  | 
(IdParser ~ T_LPAREN ~ ListParser(Exp, T_COMMA) ~ T_RPAREN).map  | 
| 644 | 152  | 
    { case x ~ _ ~ z ~ _ => Call(x, z): Exp } ||
 | 
| 954 | 153  | 
  (T_LPAREN ~ Exp ~ T_RPAREN).map{ case _ ~ y ~ _ => y: Exp } || 
 | 
154  | 
  IdParser.map{ case x => Var(x): Exp } || 
 | 
|
155  | 
  NumParser.map{ case x => Num(x): Exp }
 | 
|
| 644 | 156  | 
|
157  | 
// boolean expressions  | 
|
158  | 
lazy val BExp: Parser[List[Token], BExp] =  | 
|
| 954 | 159  | 
  (Exp ~ T_OP("==") ~ Exp).map{ case x ~ _ ~ z => Bop("==", x, z): BExp } || 
 | 
160  | 
  (Exp ~ T_OP("!=") ~ Exp).map{ case x ~ _ ~ z => Bop("!=", x, z): BExp } || 
 | 
|
161  | 
  (Exp ~ T_OP("<") ~ Exp) .map{ case x ~ _ ~ z => Bop("<",  x, z): BExp } || 
 | 
|
162  | 
  (Exp ~ T_OP(">") ~ Exp) .map{ case x ~ _ ~ z => Bop("<",  z, x): BExp } || 
 | 
|
163  | 
  (Exp ~ T_OP("<=") ~ Exp).map{ case x ~ _ ~ z => Bop("<=", x, z): BExp } || 
 | 
|
164  | 
  (Exp ~ T_OP("=>") ~ Exp).map{ case x ~ _ ~ z => Bop("<=", z, x): BExp }  
 | 
|
| 644 | 165  | 
|
166  | 
lazy val Defn: Parser[List[Token], Decl] =  | 
|
| 954 | 167  | 
   (T_KWD("def") ~ IdParser ~ T_LPAREN ~ ListParser(IdParser, T_COMMA) ~ T_RPAREN ~ T_OP("=") ~ Exp).map{ case _ ~ y ~ _ ~ w ~ _ ~ _ ~ r => Def(y, w, r): Decl }
 | 
| 644 | 168  | 
|
169  | 
lazy val Prog: Parser[List[Token], List[Decl]] =  | 
|
| 954 | 170  | 
  (Defn ~ T_SEMI ~ Prog).map{ case x ~ _ ~ z => x :: z : List[Decl] } ||
 | 
171  | 
(Exp.map((s) => List(Main(s)) : List[Decl]))  | 
|
| 644 | 172  | 
|
173  | 
||
174  | 
||
175  | 
// Reading tokens and Writing parse trees  | 
|
176  | 
||
| 
869
 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 
Christian Urban <christian.urban@kcl.ac.uk> 
parents: 
822 
diff
changeset
 | 
177  | 
// pre-2.5.0 ammonite  | 
| 
 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 
Christian Urban <christian.urban@kcl.ac.uk> 
parents: 
822 
diff
changeset
 | 
178  | 
// import ammonite.ops._  | 
| 
 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 
Christian Urban <christian.urban@kcl.ac.uk> 
parents: 
822 
diff
changeset
 | 
179  | 
|
| 
 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 
Christian Urban <christian.urban@kcl.ac.uk> 
parents: 
822 
diff
changeset
 | 
180  | 
// post 2.5.0 ammonite  | 
| 
870
 
1ea379515c6d
ivy import not needed
 
Christian Urban <christian.urban@kcl.ac.uk> 
parents: 
869 
diff
changeset
 | 
181  | 
// import os._  | 
| 789 | 182  | 
|
183  | 
def parse_tks(tks: List[Token]) : List[Decl] =  | 
|
184  | 
Prog.parse_single(tks)  | 
|
| 644 | 185  | 
|
| 822 | 186  | 
//@doc("Parses a file.")
 | 
| 789 | 187  | 
@main  | 
188  | 
def main(fname: String) : Unit = {
 | 
|
189  | 
val tks = tokenise(os.read(os.pwd / fname))  | 
|
190  | 
println(parse_tks(tks))  | 
|
| 644 | 191  | 
}  | 
192  | 
||
193  |