903
|
1 |
// Author: Zhuo Ying Jiang Li
|
|
2 |
// Starting code by Dr Christian Urban
|
|
3 |
|
|
4 |
// parser: convert sequence of tokens to AST
|
|
5 |
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
6 |
//
|
903
|
7 |
// Use this command to print parsed AST:
|
|
8 |
// amm fun_parser.sc <name>.fun
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
9 |
//
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
10 |
|
903
|
11 |
import $file.fun_tokens, fun_tokens._
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
12 |
|
903
|
13 |
// more convenience for the map parsers later on;
|
|
14 |
// it allows writing nested patterns as
|
|
15 |
// case x ~ y ~ z => ...
|
|
16 |
case class ~[+A, +B](x: A, y: B)
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
17 |
|
920
|
18 |
// parser combinators
|
|
19 |
abstract class Parser[I, T](using is: I => Seq[_]) {
|
|
20 |
def parse(in: I): Set[(T, I)]
|
903
|
21 |
|
|
22 |
def parse_all(in: I) : Set[T] =
|
920
|
23 |
for ((hd, tl) <- parse(in);
|
|
24 |
if is(tl).isEmpty) yield hd
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
25 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
26 |
|
920
|
27 |
// alternative parser
|
|
28 |
class AltParser[I, T](p: => Parser[I, T],
|
|
29 |
q: => Parser[I, T])(using I => Seq[_]) extends Parser[I, T] {
|
|
30 |
def parse(in: I) = p.parse(in) ++ q.parse(in)
|
|
31 |
}
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
32 |
|
903
|
33 |
// sequence parser
|
920
|
34 |
class SeqParser[I, T, S](p: => Parser[I, T],
|
|
35 |
q: => Parser[I, S])(using I => Seq[_]) extends Parser[I, ~[T, S]] {
|
|
36 |
def parse(in: I) =
|
|
37 |
for ((hd1, tl1) <- p.parse(in);
|
903
|
38 |
(hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
39 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
40 |
|
903
|
41 |
// map parser
|
920
|
42 |
class MapParser[I, T, S](p: => Parser[I, T],
|
|
43 |
f: T => S)(using I => Seq[_]) extends Parser[I, S] {
|
903
|
44 |
def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
45 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
46 |
|
920
|
47 |
|
903
|
48 |
// more convenient syntax for parser combinators
|
920
|
49 |
extension [I, T](p: Parser[I, T])(using I => Seq[_]) {
|
903
|
50 |
def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
51 |
def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
|
903
|
52 |
def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
53 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
54 |
|
920
|
55 |
|
903
|
56 |
// -------------------------------------------------
|
|
57 |
// atomic parsers
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
58 |
|
903
|
59 |
// atomic parser for types
|
|
60 |
case class TypeParser(ty: Set[String]) extends Parser[Tokens, String] {
|
|
61 |
def parse(tokens: Tokens) = tokens match {
|
|
62 |
case Nil => Set()
|
|
63 |
case tk::tkns if tk._1 == "type" && ty.contains(tk._2) => Set((tk._2, tkns))
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
64 |
case _ => Set()
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
65 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
66 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
67 |
|
903
|
68 |
// atomic parser for global ids
|
|
69 |
case object GlobalIdParser extends Parser[Tokens, String] {
|
|
70 |
def parse(tokens: Tokens) = tokens match {
|
|
71 |
case Nil => Set()
|
|
72 |
case tk::tkns if tk._1 == "global" => Set((tk._2, tkns))
|
|
73 |
case _ => Set()
|
|
74 |
}
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
75 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
76 |
|
903
|
77 |
// atomic parser for ids
|
|
78 |
case object IdParser extends Parser[Tokens, String] {
|
|
79 |
def parse(tokens: Tokens) = tokens match {
|
|
80 |
case Nil => Set()
|
|
81 |
case tk::tkns if tk._1 == "id" => Set((tk._2, tkns))
|
|
82 |
case _ => Set()
|
|
83 |
}
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
84 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
85 |
|
903
|
86 |
// atomic parser for doubles (I use Float because that's what is used in the AST structures given in CW5)
|
|
87 |
case object DoubleParser extends Parser[Tokens, Float] {
|
|
88 |
def parse(tokens: Tokens) = tokens match {
|
|
89 |
case Nil => Set()
|
|
90 |
case tk::tkns if tk._1 == "double" => Set((tk._2.toFloat, tkns))
|
|
91 |
case _ => Set()
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
92 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
93 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
94 |
|
903
|
95 |
// atomic parser for integers
|
|
96 |
case object IntParser extends Parser[Tokens, Int] {
|
|
97 |
def parse(tokens: Tokens) = tokens match {
|
|
98 |
case Nil => Set()
|
|
99 |
case tk::tkns if tk._1 == "int" => Set((tk._2.toInt, tkns))
|
|
100 |
case _ => Set()
|
|
101 |
}
|
|
102 |
}
|
|
103 |
|
|
104 |
// atomic parser for operators
|
|
105 |
case class OpParser(ops: Set[String]) extends Parser[Tokens, String] {
|
|
106 |
def parse(tokens: Tokens) = tokens match {
|
|
107 |
case Nil => Set()
|
|
108 |
case tk::tkns if tk._1 == "op" && ops.contains(tk._2) => Set((tk._2, tkns))
|
|
109 |
case _ => Set()
|
|
110 |
}
|
|
111 |
}
|
|
112 |
|
|
113 |
// atomic parser for character
|
|
114 |
case object CharParser extends Parser[Tokens, Char] {
|
|
115 |
def parse(tokens: Tokens) = tokens match {
|
|
116 |
case Nil => Set()
|
|
117 |
case tk::tkns if tk._1 == "ch" => {
|
|
118 |
val stripped = tk._2.slice(1, tk._2.length-1) // strip off single quotes
|
|
119 |
stripped match {
|
|
120 |
case "\\n" => Set(('\n', tkns))
|
|
121 |
case "\\t" => Set(('\t', tkns))
|
|
122 |
case "\\r" => Set(('\r', tkns))
|
|
123 |
case c => Set((c(0), tkns))
|
|
124 |
}
|
|
125 |
}
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
126 |
case _ => Set()
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
127 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
128 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
129 |
|
903
|
130 |
// parser for list of arguments
|
|
131 |
def ListParser[I, T, S](p: => Parser[I, T],
|
|
132 |
q: => Parser[I, S])(implicit ev: I => Seq[_]): Parser[I, List[T]] = {
|
|
133 |
(p ~ q ~ ListParser(p, q)).map{ case x ~ _ ~ z => x :: z : List[T] } ||
|
|
134 |
(p.map((s) => List(s)))
|
|
135 |
}
|
|
136 |
|
|
137 |
// I may want to write string interpolations for:
|
|
138 |
// keywords, semicolon, colon, comma, parentheses
|
|
139 |
case class StrParser(s: String) extends Parser[Tokens, String] {
|
|
140 |
def parse(tokens: Tokens) = tokens match {
|
|
141 |
case Nil => Set()
|
|
142 |
case tk::tkns if tk._2 == s => Set((s, tkns))
|
|
143 |
case _ => Set()
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
144 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
145 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
146 |
|
920
|
147 |
extension (sc: StringContext) {
|
|
148 |
def p(args: Any*) = StrParser(sc.s(args:_*))
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
149 |
}
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
150 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
151 |
|
903
|
152 |
// the AST datastructures for the FUN language
|
|
153 |
|
|
154 |
abstract class Exp
|
|
155 |
abstract class BExp
|
|
156 |
abstract class Decl
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
157 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
158 |
case class Def(name: String, args: List[(String, String)], ty: String, body: Exp) extends Decl
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
159 |
case class Main(e: Exp) extends Decl
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
160 |
case class Const(name: String, v: Int) extends Decl
|
903
|
161 |
case class FConst(name: String, x: Float) extends Decl
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
162 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
163 |
case class Call(name: String, args: List[Exp]) extends Exp
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
164 |
case class If(a: BExp, e1: Exp, e2: Exp) extends Exp
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
165 |
case class Var(s: String) extends Exp
|
903
|
166 |
case class Num(i: Int) extends Exp // integer numbers
|
|
167 |
case class FNum(i: Float) extends Exp // float numbers
|
|
168 |
case class ChConst(c: Int) extends Exp // character constants
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
169 |
case class Aop(o: String, a1: Exp, a2: Exp) extends Exp
|
903
|
170 |
case class Sequence(e1: Exp, e2: Exp) extends Exp // expressions separated by semicolons
|
|
171 |
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
172 |
case class Bop(o: String, a1: Exp, a2: Exp) extends BExp
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
173 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
174 |
|
903
|
175 |
lazy val Exps: Parser[Tokens, Exp] =
|
|
176 |
(Exp ~ p";" ~ Exps).map[Exp]{ case x ~ _ ~ z => Sequence(x, z) } ||
|
|
177 |
Exp
|
|
178 |
|
|
179 |
lazy val Exp: Parser[Tokens, Exp] =
|
|
180 |
(p"if" ~ BExp ~ p"then" ~ Exp ~ p"else" ~ Exp).map[Exp]{ case _ ~ x ~ _ ~ y ~ _ ~ z => If(x, y, z) } ||
|
|
181 |
M
|
|
182 |
|
|
183 |
lazy val M: Parser[Tokens, Exp] =
|
|
184 |
(T ~ OpParser(Set("+", "-")) ~ M).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } ||
|
|
185 |
T
|
|
186 |
|
|
187 |
lazy val T: Parser[Tokens, Exp] =
|
|
188 |
(U ~ OpParser(Set("*", "/", "%")) ~ T).map[Exp]{ case x ~ y ~ z => Aop(y, x, z) } ||
|
|
189 |
U
|
|
190 |
|
|
191 |
// includes negative factor
|
|
192 |
// a + - b CAN be recognised
|
|
193 |
// - - - b CAN be recognised
|
|
194 |
lazy val U: Parser[Tokens, Exp] =
|
|
195 |
(OpParser(Set("-")) ~ U).map[Exp]{ case _ ~ y => Aop("*", Num(-1), y) } ||
|
|
196 |
(OpParser(Set("+")) ~ U).map[Exp]{ case _ ~ y => y } ||
|
|
197 |
F
|
|
198 |
|
|
199 |
lazy val F: Parser[Tokens, Exp] =
|
|
200 |
(p"(" ~ Exp ~ p")").map[Exp]{ case _ ~ y ~ _ => y } ||
|
|
201 |
(p"skip").map(_ => Call("skip", Nil)) || // hardcoded
|
|
202 |
(p"skip" ~ p"(" ~ p")").map(_ => Call("skip", Nil)) || // hardcoded
|
|
203 |
(IdParser ~ p"(" ~ ListParser(Exp, p",") ~ p")").map[Exp]{ case id ~ _ ~ args ~ _ => Call(id, args) } ||
|
|
204 |
(IdParser ~ p"(" ~ p")").map[Exp]{ case id ~ _ ~ _ => Call(id, Nil) } || // NOTE: empty args are also accepted!
|
|
205 |
(IdParser || GlobalIdParser).map(x => Var(x)) ||
|
|
206 |
IntParser.map(x => Num(x)) ||
|
|
207 |
DoubleParser.map(x => FNum(x)) ||
|
|
208 |
CharParser.map(x => ChConst(x.toInt)) ||
|
|
209 |
(p"{" ~ Exps ~ p"}").map[Exp]{ case _ ~ x ~ _ => x }
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
210 |
|
903
|
211 |
lazy val BExp: Parser[Tokens, BExp] =
|
|
212 |
(Exp ~ OpParser(Set("==", "!=", "<", ">", "<=", ">=")) ~ Exp).map[BExp]{ case x ~ y ~ z => Bop(y, x, z) } ||
|
|
213 |
(p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ y ~ _ => y }
|
|
214 |
|
|
215 |
lazy val TypedIdParser: Parser[Tokens, (String, String)] =
|
|
216 |
(IdParser ~ p":" ~ TypeParser(Set("Int", "Double"))).map{ case n ~ _ ~ t => (n, t) }
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
217 |
|
903
|
218 |
lazy val Defn: Parser[Tokens, Decl] =
|
|
219 |
(p"def" ~ IdParser ~ p"(" ~ ListParser(TypedIdParser, p",") ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{
|
|
220 |
case _ ~ y ~ _ ~ w ~ _ ~ _ ~ t ~ _ ~ b => Def(y, w, t, b)
|
|
221 |
} ||
|
|
222 |
(p"def" ~ IdParser ~ p"(" ~ p")" ~ p":" ~ TypeParser(Set("Int", "Double", "Void")) ~ OpParser(Set("=")) ~ Exp).map[Decl]{
|
|
223 |
case _ ~ y ~ _ ~ _ ~ _ ~ t ~ _ ~ b => Def(y, Nil, t, b)
|
|
224 |
}
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
225 |
|
903
|
226 |
lazy val Constp: Parser[Tokens, Decl] =
|
|
227 |
(p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ IntParser).map[Decl]{ // IntParser? Not Exp? For this AST, impossible to define Exp
|
|
228 |
case _ ~ id ~ _ ~ _ ~ _ ~ n => Const(id, n)
|
|
229 |
} ||
|
|
230 |
(p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Int")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ IntParser).map[Decl]{ // IntParser? Not Exp? For this AST, impossible to define Exp
|
|
231 |
case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => Const(id, -n)
|
|
232 |
}
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
233 |
|
903
|
234 |
// Int can be converted to Double but not viceversa
|
|
235 |
lazy val FConstp: Parser[Tokens, Decl] =
|
|
236 |
(p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{
|
|
237 |
case _ ~ id ~ _ ~ _ ~ _ ~ n => FConst(id, n)
|
|
238 |
} ||
|
|
239 |
(p"val" ~ GlobalIdParser ~ p":" ~ TypeParser(Set("Double")) ~ OpParser(Set("=")) ~ OpParser(Set("-")) ~ (DoubleParser || IntParser.map[Float](i => i.toFloat))).map[Decl]{
|
|
240 |
case _ ~ id ~ _ ~ _ ~ _ ~ _ ~ n => FConst(id, -n)
|
|
241 |
}
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
242 |
|
903
|
243 |
// Prog consists of global const declarations, f(x) defs, and exp in ANY order
|
|
244 |
// restricted to main body at the bottom
|
|
245 |
lazy val Prog: Parser[Tokens, List[Decl]] =
|
|
246 |
(Defn ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
|
|
247 |
(Constp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
|
|
248 |
(FConstp ~ p";" ~ Prog).map[List[Decl]]{ case x ~ _ ~ z => x :: z } ||
|
|
249 |
Exp.map[List[Decl]](s => List(Main(s)))
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
250 |
|
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
251 |
|
903
|
252 |
def parse_tks(tokens: Tokens) = Prog.parse_all(tokens)
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
253 |
|
903
|
254 |
import scala.io.Source._
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
255 |
|
903
|
256 |
@main
|
|
257 |
def parse(filename: String) = {
|
|
258 |
val fun_code = fromFile(filename).getLines.mkString("\n")
|
|
259 |
// print the AST list to screen
|
|
260 |
println(parse_tks(tokenise(fun_code)))
|
864
Christian Urban <christian.urban@kcl.ac.uk>
parents:
diff
changeset
|
261 |
}
|