| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Thu, 19 Sep 2024 19:25:13 +0100 | |
| changeset 963 | 4e3f7b3574a9 | 
| parent 960 | 791f4d9f53e1 | 
| child 974 | 06148fc63273 | 
| permissions | -rw-r--r-- | 
| 645 | 1  | 
// A parser for the Fun language  | 
2  | 
//================================  | 
|
3  | 
//  | 
|
4  | 
// call with  | 
|
5  | 
//  | 
|
| 789 | 6  | 
// amm fun_parser.sc fact.fun  | 
| 954 | 7  | 
|
| 789 | 8  | 
// amm fun_parser.sc defs.fun  | 
9  | 
//  | 
|
10  | 
// this will generate a parse-tree from a list  | 
|
11  | 
// of tokens  | 
|
| 644 | 12  | 
|
| 960 | 13  | 
//> using toolkit 0.5.0  | 
14  | 
// > using file fun_tokens.scala  | 
|
15  | 
||
16  | 
||
| 644 | 17  | 
import scala.language.implicitConversions  | 
18  | 
import scala.language.reflectiveCalls  | 
|
19  | 
||
| 960 | 20  | 
|
21  | 
//import $file.fun_tokens, fun_tokens._  | 
|
| 644 | 22  | 
|
23  | 
||
24  | 
// Parser combinators  | 
|
25  | 
// type parameter I needs to be of Seq-type  | 
|
26  | 
//  | 
|
| 960 | 27  | 
type IsSeq[I] = I => Seq[?]  | 
| 954 | 28  | 
|
29  | 
/*  | 
|
30  | 
abstract class Parser[I, T](using is: I => Seq[_])  {
 | 
|
31  | 
def parse(in: I): Set[(T, I)]  | 
|
32  | 
||
33  | 
def parse_all(in: I) : Set[T] =  | 
|
34  | 
for ((hd, tl) <- parse(in);  | 
|
35  | 
if is(tl).isEmpty) yield hd  | 
|
36  | 
}  | 
|
37  | 
*/  | 
|
38  | 
||
39  | 
||
| 960 | 40  | 
abstract class Parser[I, T](using is: I => Seq[?]) {
 | 
| 644 | 41  | 
def parse(ts: I): Set[(T, I)]  | 
42  | 
||
| 655 | 43  | 
def parse_single(ts: I) : T =  | 
| 954 | 44  | 
    parse(ts).partition(p => is(p._2).isEmpty) match {
 | 
| 655 | 45  | 
case (good, _) if !good.isEmpty => good.head._1  | 
| 954 | 46  | 
      case (_, err) => { println (s"Parse Error\n${err.minBy(p => is(p._2).length)}") ; sys.exit(-1) }
 | 
| 655 | 47  | 
}  | 
| 644 | 48  | 
}  | 
49  | 
||
50  | 
// convenience for writing grammar rules  | 
|
51  | 
case class ~[+A, +B](_1: A, _2: B)  | 
|
52  | 
||
| 954 | 53  | 
// parser combinators  | 
54  | 
||
55  | 
// alternative parser  | 
|
56  | 
class AltParser[I : IsSeq, T](p: => Parser[I, T],  | 
|
57  | 
                              q: => Parser[I, T]) extends Parser[I, T] {
 | 
|
58  | 
def parse(in: I) = p.parse(in) ++ q.parse(in)  | 
|
| 644 | 59  | 
}  | 
60  | 
||
| 954 | 61  | 
// sequence parser  | 
62  | 
class SeqParser[I : IsSeq, T, S](p: => Parser[I, T],  | 
|
63  | 
                                 q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
 | 
|
64  | 
def parse(in: I) =  | 
|
65  | 
for ((hd1, tl1) <- p.parse(in);  | 
|
66  | 
(hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)  | 
|
| 644 | 67  | 
}  | 
68  | 
||
| 954 | 69  | 
// map parser  | 
70  | 
class MapParser[I : IsSeq, T, S](p: => Parser[I, T],  | 
|
71  | 
                                f: T => S) extends Parser[I, S] {
 | 
|
72  | 
def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)  | 
|
| 644 | 73  | 
}  | 
74  | 
||
| 954 | 75  | 
|
76  | 
||
77  | 
// more convenient syntax for parser combinators  | 
|
78  | 
extension [I : IsSeq, T](p: Parser[I, T]) {
 | 
|
79  | 
def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)  | 
|
| 644 | 80  | 
def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)  | 
| 954 | 81  | 
def map[S](f: => T => S) = new MapParser[I, T, S](p, f)  | 
| 644 | 82  | 
}  | 
83  | 
||
| 960 | 84  | 
def ListParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S])(using is: I => Seq[?]): Parser[I, List[T]] = {
 | 
| 954 | 85  | 
  (p ~ q ~ ListParser(p, q)).map{ case (x:T) ~ (y:S) ~ (z:List[T]) => x :: z } ||
 | 
86  | 
  (p.map[List[T]]{s => List(s)})
 | 
|
| 644 | 87  | 
}  | 
88  | 
||
89  | 
case class TokParser(tok: Token) extends Parser[List[Token], Token] {
 | 
|
90  | 
  def parse(ts: List[Token]) = ts match {
 | 
|
91  | 
case t::ts if (t == tok) => Set((t, ts))  | 
|
92  | 
case _ => Set ()  | 
|
93  | 
}  | 
|
94  | 
}  | 
|
95  | 
||
| 954 | 96  | 
implicit def token2tparser(t: Token) : Parser[List[Token], Token] = TokParser(t)  | 
97  | 
||
| 644 | 98  | 
|
| 954 | 99  | 
extension (t: Token) {
 | 
| 644 | 100  | 
def || (q : => Parser[List[Token], Token]) = new AltParser[List[Token], Token](t, q)  | 
| 954 | 101  | 
def map[S] (f: => Token => S) = new MapParser[List[Token], Token, S](t, f)  | 
| 644 | 102  | 
def ~[S](q : => Parser[List[Token], S]) = new SeqParser[List[Token], Token, S](t, q)  | 
103  | 
}  | 
|
104  | 
||
105  | 
case object NumParser extends Parser[List[Token], Int] {
 | 
|
106  | 
  def parse(ts: List[Token]) = ts match {
 | 
|
107  | 
case T_NUM(n)::ts => Set((n, ts))  | 
|
108  | 
case _ => Set ()  | 
|
109  | 
}  | 
|
110  | 
}  | 
|
111  | 
||
112  | 
case object IdParser extends Parser[List[Token], String] {
 | 
|
113  | 
  def parse(ts: List[Token]) = ts match {
 | 
|
114  | 
case T_ID(s)::ts => Set((s, ts))  | 
|
115  | 
case _ => Set ()  | 
|
116  | 
}  | 
|
117  | 
}  | 
|
118  | 
||
119  | 
||
120  | 
||
121  | 
// Abstract syntax trees for the Fun language  | 
|
122  | 
abstract class Exp extends Serializable  | 
|
123  | 
abstract class BExp extends Serializable  | 
|
124  | 
abstract class Decl extends Serializable  | 
|
125  | 
||
126  | 
case class Def(name: String, args: List[String], body: Exp) extends Decl  | 
|
127  | 
case class Main(e: Exp) extends Decl  | 
|
128  | 
||
129  | 
case class Call(name: String, args: List[Exp]) extends Exp  | 
|
130  | 
case class If(a: BExp, e1: Exp, e2: Exp) extends Exp  | 
|
131  | 
case class Write(e: Exp) extends Exp  | 
|
132  | 
case class Var(s: String) extends Exp  | 
|
133  | 
case class Num(i: Int) extends Exp  | 
|
134  | 
case class Aop(o: String, a1: Exp, a2: Exp) extends Exp  | 
|
135  | 
case class Sequence(e1: Exp, e2: Exp) extends Exp  | 
|
136  | 
case class Bop(o: String, a1: Exp, a2: Exp) extends BExp  | 
|
137  | 
||
138  | 
||
139  | 
||
140  | 
// Grammar Rules for the Fun language  | 
|
141  | 
||
142  | 
// arithmetic expressions  | 
|
143  | 
lazy val Exp: Parser[List[Token], Exp] =  | 
|
| 954 | 144  | 
  (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Exp ~ T_KWD("else") ~ Exp).map{ case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z): Exp } ||
 | 
145  | 
  (M ~ T_SEMI ~ Exp).map{ case x ~ _ ~ y => Sequence(x, y): Exp } || M
 | 
|
| 644 | 146  | 
lazy val M: Parser[List[Token], Exp] =  | 
| 954 | 147  | 
  (T_KWD("write") ~ L).map{ case _ ~ y => Write(y): Exp } || L
 | 
| 644 | 148  | 
lazy val L: Parser[List[Token], Exp] =  | 
| 954 | 149  | 
  (T ~ T_OP("+") ~ Exp).map{ case x ~ _ ~ z => Aop("+", x, z): Exp } ||
 | 
150  | 
  (T ~ T_OP("-") ~ Exp).map{ case x ~ _ ~ z => Aop("-", x, z): Exp } || T  
 | 
|
| 644 | 151  | 
lazy val T: Parser[List[Token], Exp] =  | 
| 954 | 152  | 
  (F ~ T_OP("*") ~ T).map{ case x ~ _ ~ z => Aop("*", x, z): Exp } || 
 | 
153  | 
  (F ~ T_OP("/") ~ T).map{ case x ~ _ ~ z => Aop("/", x, z): Exp } || 
 | 
|
154  | 
  (F ~ T_OP("%") ~ T).map{ case x ~ _ ~ z => Aop("%", x, z): Exp } || F
 | 
|
| 644 | 155  | 
lazy val F: Parser[List[Token], Exp] =  | 
| 954 | 156  | 
(IdParser ~ T_LPAREN ~ ListParser(Exp, T_COMMA) ~ T_RPAREN).map  | 
| 644 | 157  | 
    { case x ~ _ ~ z ~ _ => Call(x, z): Exp } ||
 | 
| 954 | 158  | 
  (T_LPAREN ~ Exp ~ T_RPAREN).map{ case _ ~ y ~ _ => y: Exp } || 
 | 
159  | 
  IdParser.map{ case x => Var(x): Exp } || 
 | 
|
160  | 
  NumParser.map{ case x => Num(x): Exp }
 | 
|
| 644 | 161  | 
|
162  | 
// boolean expressions  | 
|
163  | 
lazy val BExp: Parser[List[Token], BExp] =  | 
|
| 954 | 164  | 
  (Exp ~ T_OP("==") ~ Exp).map{ case x ~ _ ~ z => Bop("==", x, z): BExp } || 
 | 
165  | 
  (Exp ~ T_OP("!=") ~ Exp).map{ case x ~ _ ~ z => Bop("!=", x, z): BExp } || 
 | 
|
166  | 
  (Exp ~ T_OP("<") ~ Exp) .map{ case x ~ _ ~ z => Bop("<",  x, z): BExp } || 
 | 
|
167  | 
  (Exp ~ T_OP(">") ~ Exp) .map{ case x ~ _ ~ z => Bop("<",  z, x): BExp } || 
 | 
|
168  | 
  (Exp ~ T_OP("<=") ~ Exp).map{ case x ~ _ ~ z => Bop("<=", x, z): BExp } || 
 | 
|
169  | 
  (Exp ~ T_OP("=>") ~ Exp).map{ case x ~ _ ~ z => Bop("<=", z, x): BExp }  
 | 
|
| 644 | 170  | 
|
171  | 
lazy val Defn: Parser[List[Token], Decl] =  | 
|
| 954 | 172  | 
   (T_KWD("def") ~ IdParser ~ T_LPAREN ~ ListParser(IdParser, T_COMMA) ~ T_RPAREN ~ T_OP("=") ~ Exp).map{ case _ ~ y ~ _ ~ w ~ _ ~ _ ~ r => Def(y, w, r): Decl }
 | 
| 644 | 173  | 
|
174  | 
lazy val Prog: Parser[List[Token], List[Decl]] =  | 
|
| 954 | 175  | 
  (Defn ~ T_SEMI ~ Prog).map{ case x ~ _ ~ z => x :: z : List[Decl] } ||
 | 
176  | 
(Exp.map((s) => List(Main(s)) : List[Decl]))  | 
|
| 644 | 177  | 
|
178  | 
||
179  | 
||
180  | 
// Reading tokens and Writing parse trees  | 
|
181  | 
||
| 
869
 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 
Christian Urban <christian.urban@kcl.ac.uk> 
parents: 
822 
diff
changeset
 | 
182  | 
// pre-2.5.0 ammonite  | 
| 
 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 
Christian Urban <christian.urban@kcl.ac.uk> 
parents: 
822 
diff
changeset
 | 
183  | 
// import ammonite.ops._  | 
| 
 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 
Christian Urban <christian.urban@kcl.ac.uk> 
parents: 
822 
diff
changeset
 | 
184  | 
|
| 
 
16247acc4b0e
changed os-lib as a replacement for ammonite-ops
 
Christian Urban <christian.urban@kcl.ac.uk> 
parents: 
822 
diff
changeset
 | 
185  | 
// post 2.5.0 ammonite  | 
| 
870
 
1ea379515c6d
ivy import not needed
 
Christian Urban <christian.urban@kcl.ac.uk> 
parents: 
869 
diff
changeset
 | 
186  | 
// import os._  | 
| 789 | 187  | 
|
188  | 
def parse_tks(tks: List[Token]) : List[Decl] =  | 
|
189  | 
Prog.parse_single(tks)  | 
|
| 644 | 190  | 
|
| 822 | 191  | 
//@doc("Parses a file.")
 | 
| 960 | 192  | 
//@main  | 
| 789 | 193  | 
def main(fname: String) : Unit = {
 | 
194  | 
val tks = tokenise(os.read(os.pwd / fname))  | 
|
195  | 
println(parse_tks(tks))  | 
|
| 644 | 196  | 
}  | 
197  | 
||
198  |