|
1 // A parser and evaluator for teh while language |
|
2 // |
|
3 //:load matcher.scala |
|
4 //:load parser3.scala |
|
5 |
|
6 // some regular expressions |
|
7 val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") |
|
8 val DIGIT = RANGE("0123456789") |
|
9 val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) |
|
10 val NUM = PLUS(DIGIT) |
|
11 val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") |
|
12 val SEMI: Rexp = ";" |
|
13 val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") |
|
14 val WHITESPACE = PLUS(RANGE(" \n")) |
|
15 val RPAREN: Rexp = ")" |
|
16 val LPAREN: Rexp = "(" |
|
17 val BEGIN: Rexp = "{" |
|
18 val END: Rexp = "}" |
|
19 val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") |
|
20 |
|
21 // tokens for classifying the strings that have been recognised |
|
22 abstract class Token |
|
23 case object T_WHITESPACE extends Token |
|
24 case object T_COMMENT extends Token |
|
25 case object T_SEMI extends Token |
|
26 case object T_LPAREN extends Token |
|
27 case object T_RPAREN extends Token |
|
28 case object T_BEGIN extends Token |
|
29 case object T_END extends Token |
|
30 case class T_ID(s: String) extends Token |
|
31 case class T_OP(s: String) extends Token |
|
32 case class T_NUM(s: String) extends Token |
|
33 case class T_KWD(s: String) extends Token |
|
34 |
|
35 val lexing_rules: List[Rule[Token]] = |
|
36 List((KEYWORD, (s) => T_KWD(s.mkString)), |
|
37 (ID, (s) => T_ID(s.mkString)), |
|
38 (OP, (s) => T_OP(s.mkString)), |
|
39 (NUM, (s) => T_NUM(s.mkString)), |
|
40 (SEMI, (s) => T_SEMI), |
|
41 (LPAREN, (s) => T_LPAREN), |
|
42 (RPAREN, (s) => T_RPAREN), |
|
43 (BEGIN, (s) => T_BEGIN), |
|
44 (END, (s) => T_END), |
|
45 (WHITESPACE, (s) => T_WHITESPACE), |
|
46 (COMMENT, (s) => T_COMMENT)) |
|
47 |
|
48 // the tokenizer |
|
49 val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) |
|
50 |
|
51 // the abstract syntax trees |
|
52 abstract class Stmt |
|
53 abstract class AExp |
|
54 abstract class BExp |
|
55 type Block = List[Stmt] |
|
56 case object Skip extends Stmt |
|
57 case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt |
|
58 case class While(b: BExp, bl: Block) extends Stmt |
|
59 case class Assign(s: String, a: AExp) extends Stmt |
|
60 case class Write(s: String) extends Stmt |
|
61 |
|
62 case class Var(s: String) extends AExp |
|
63 case class Num(i: Int) extends AExp |
|
64 case class Aop(o: String, a1: AExp, a2: AExp) extends AExp |
|
65 |
|
66 case object True extends BExp |
|
67 case object False extends BExp |
|
68 case class Relop(o: String, a1: AExp, a2: AExp) extends BExp |
|
69 |
|
70 // atomic parsers |
|
71 case class TokParser(tok: Token) extends Parser[List[Token], Token] { |
|
72 def parse(ts: List[Token]) = ts match { |
|
73 case t::ts if (t == tok) => Set((t, ts)) |
|
74 case _ => Set () |
|
75 } |
|
76 } |
|
77 implicit def token2tparser(t: Token) = TokParser(t) |
|
78 |
|
79 case object NumParser extends Parser[List[Token], Int] { |
|
80 def parse(ts: List[Token]) = ts match { |
|
81 case T_NUM(s)::ts => Set((s.toInt, ts)) |
|
82 case _ => Set () |
|
83 } |
|
84 } |
|
85 |
|
86 case object IdParser extends Parser[List[Token], String] { |
|
87 def parse(ts: List[Token]) = ts match { |
|
88 case T_ID(s)::ts => Set((s, ts)) |
|
89 case _ => Set () |
|
90 } |
|
91 } |
|
92 |
|
93 |
|
94 // arithmetic expressions |
|
95 lazy val AExp: Parser[List[Token], AExp] = |
|
96 (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || |
|
97 (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T |
|
98 lazy val T: Parser[List[Token], AExp] = |
|
99 (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F |
|
100 lazy val F: Parser[List[Token], AExp] = |
|
101 (T_LPAREN ~> AExp <~ T_RPAREN) || |
|
102 IdParser ==> Var || |
|
103 NumParser ==> Num |
|
104 |
|
105 // boolean expressions |
|
106 lazy val BExp: Parser[List[Token], BExp] = |
|
107 (T_KWD("true") ==> ((_) => True: BExp)) || |
|
108 (T_KWD("false") ==> ((_) => False: BExp)) || |
|
109 (T_LPAREN ~> BExp <~ T_RPAREN) || |
|
110 (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } || |
|
111 (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } || |
|
112 (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } || |
|
113 (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp } |
|
114 |
|
115 lazy val Stmt: Parser[List[Token], Stmt] = |
|
116 (T_KWD("skip") ==> ((_) => Skip: Stmt)) || |
|
117 (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || |
|
118 (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> |
|
119 { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || |
|
120 (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || |
|
121 (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } |
|
122 |
|
123 lazy val Stmts: Parser[List[Token], Block] = |
|
124 (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || |
|
125 (Stmt ==> ((s) => List(s) : Block)) |
|
126 |
|
127 lazy val Block: Parser[List[Token], Block] = |
|
128 (T_BEGIN ~> Stmts <~ T_END) || |
|
129 (Stmt ==> ((s) => List(s))) |
|
130 |
|
131 // compiler |
|
132 val beginning = """ |
|
133 .class public examples/HelloWorld |
|
134 .super java/lang/Object |
|
135 |
|
136 .method public <init>()V |
|
137 aload_0 |
|
138 invokenonvirtual java/lang/Object/<init>()V |
|
139 return |
|
140 .end method |
|
141 |
|
142 .method public static write(I)V |
|
143 .limit locals 5 |
|
144 .limit stack 5 |
|
145 iload 0 |
|
146 getstatic java/lang/System/out Ljava/io/PrintStream; |
|
147 swap |
|
148 invokevirtual java/io/PrintStream/println(I)V |
|
149 return |
|
150 .end method |
|
151 |
|
152 |
|
153 .method public static main([Ljava/lang/String;)V |
|
154 .limit locals 200 |
|
155 .limit stack 200 |
|
156 |
|
157 """ |
|
158 |
|
159 val ending = """ |
|
160 |
|
161 return |
|
162 |
|
163 .end method |
|
164 """ |
|
165 |
|
166 // for generating new labels |
|
167 var counter = -1 |
|
168 |
|
169 def Fresh(x: String) = { |
|
170 counter += 1 |
|
171 x ++ "_" ++ counter.toString() |
|
172 } |
|
173 |
|
174 type Env = Map[String, String] |
|
175 type Instrs = List[String] |
|
176 |
|
177 def compile_aexp(a: AExp, env : Env) : Instrs = a match { |
|
178 case Num(i) => List("ldc " + i.toString + "\n") |
|
179 case Var(s) => List("iload " + env(s) + "\n") |
|
180 case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n") |
|
181 case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n") |
|
182 case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n") |
|
183 } |
|
184 |
|
185 def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match { |
|
186 case True => Nil |
|
187 case False => List("goto " + jmp + "\n") |
|
188 case Relop("=", a1, a2) => |
|
189 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n") |
|
190 case Relop("!=", a1, a2) => |
|
191 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n") |
|
192 case Relop("<", a1, a2) => |
|
193 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n") |
|
194 } |
|
195 |
|
196 |
|
197 def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match { |
|
198 case Skip => (Nil, env) |
|
199 case Assign(x, a) => { |
|
200 val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString |
|
201 (compile_aexp(a, env) ++ |
|
202 List("istore " + index + "\n"), env + (x -> index)) |
|
203 } |
|
204 case If(b, bl1, bl2) => { |
|
205 val if_else = Fresh("If_else") |
|
206 val if_end = Fresh("If_end") |
|
207 val (instrs1, env1) = compile_bl(bl1, env) |
|
208 val (instrs2, env2) = compile_bl(bl2, env1) |
|
209 (compile_bexp(b, env, if_else) ++ |
|
210 instrs1 ++ |
|
211 List("goto " + if_end + "\n") ++ |
|
212 List("\n" + if_else + ":\n\n") ++ |
|
213 instrs2 ++ |
|
214 List("\n" + if_end + ":\n\n"), env2) |
|
215 } |
|
216 case While(b, bl) => { |
|
217 val loop_begin = Fresh("Loop_begin") |
|
218 val loop_end = Fresh("Loop_end") |
|
219 val (instrs1, env1) = compile_bl(bl, env) |
|
220 (List("\n" + loop_begin + ":\n\n") ++ |
|
221 compile_bexp(b, env, loop_end) ++ |
|
222 instrs1 ++ |
|
223 List("goto " + loop_begin + "\n") ++ |
|
224 List("\n" + loop_end + ":\n\n"), env1) |
|
225 } |
|
226 case Write(x) => |
|
227 (List("iload " + env(x) + "\n" + "invokestatic examples/HelloWorld/write(I)V\n"), env) |
|
228 } |
|
229 |
|
230 def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match { |
|
231 case Nil => (Nil, env) |
|
232 case s::bl => { |
|
233 val (instrs1, env1) = compile_stmt(s, env) |
|
234 val (instrs2, env2) = compile_bl(bl, env1) |
|
235 (instrs1 ++ instrs2, env2) |
|
236 } |
|
237 } |
|
238 |
|
239 def compile(input: String) : String = { |
|
240 val tks = Tok.fromFile(input) |
|
241 val ast = Stmts.parse_single(tks) |
|
242 val instructions = compile_bl(ast, Map.empty)._1 |
|
243 beginning ++ instructions.mkString ++ ending |
|
244 } |
|
245 |
|
246 |
|
247 def compile_to(input: String, output: String) = { |
|
248 val fw = new java.io.FileWriter(output) |
|
249 fw.write(compile(input)) |
|
250 fw.close() |
|
251 } |
|
252 |
|
253 |
|
254 //examples |
|
255 |
|
256 compile_to("loops.while", "loops.j") |
|
257 compile_to("fib.while", "fib.j") |
|
258 |
|
259 |
|
260 |
|
261 |
|
262 |
|
263 |
|
264 |
|
265 |
|
266 |