1 // A Compiler for the WHILE language |
1 // A Small Compiler for the WHILE Language |
2 // |
2 // |
3 import matcher._ |
|
4 import parser._ |
|
5 |
|
6 // some regular expressions |
|
7 val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") |
|
8 val DIGIT = RANGE("0123456789") |
|
9 val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) |
|
10 val NUM = PLUS(DIGIT) |
|
11 val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") |
|
12 val SEMI: Rexp = ";" |
|
13 val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") |
|
14 val WHITESPACE = PLUS(RANGE(" \n")) |
|
15 val RPAREN: Rexp = ")" |
|
16 val LPAREN: Rexp = "(" |
|
17 val BEGIN: Rexp = "{" |
|
18 val END: Rexp = "}" |
|
19 val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") |
|
20 |
|
21 // tokens for classifying the strings that have been recognised |
|
22 abstract class Token |
|
23 case object T_WHITESPACE extends Token |
|
24 case object T_COMMENT extends Token |
|
25 case object T_SEMI extends Token |
|
26 case object T_LPAREN extends Token |
|
27 case object T_RPAREN extends Token |
|
28 case object T_BEGIN extends Token |
|
29 case object T_END extends Token |
|
30 case class T_ID(s: String) extends Token |
|
31 case class T_OP(s: String) extends Token |
|
32 case class T_NUM(s: String) extends Token |
|
33 case class T_KWD(s: String) extends Token |
|
34 |
|
35 val lexing_rules: List[(Rexp, List[Char] => Token)] = |
|
36 List((KEYWORD, (s) => T_KWD(s.mkString)), |
|
37 (ID, (s) => T_ID(s.mkString)), |
|
38 (OP, (s) => T_OP(s.mkString)), |
|
39 (NUM, (s) => T_NUM(s.mkString)), |
|
40 (SEMI, (s) => T_SEMI), |
|
41 (LPAREN, (s) => T_LPAREN), |
|
42 (RPAREN, (s) => T_RPAREN), |
|
43 (BEGIN, (s) => T_BEGIN), |
|
44 (END, (s) => T_END), |
|
45 (WHITESPACE, (s) => T_WHITESPACE), |
|
46 (COMMENT, (s) => T_COMMENT)) |
|
47 |
|
48 // the tokenizer |
|
49 val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) |
|
50 |
3 |
51 // the abstract syntax trees |
4 // the abstract syntax trees |
52 abstract class Stmt |
5 abstract class Stmt |
53 abstract class AExp |
6 abstract class AExp |
54 abstract class BExp |
7 abstract class BExp |
55 type Block = List[Stmt] |
8 type Block = List[Stmt] |
|
9 |
|
10 // statements |
56 case object Skip extends Stmt |
11 case object Skip extends Stmt |
57 case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt |
12 case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt |
58 case class While(b: BExp, bl: Block) extends Stmt |
13 case class While(b: BExp, bl: Block) extends Stmt |
59 case class Assign(s: String, a: AExp) extends Stmt |
14 case class Assign(s: String, a: AExp) extends Stmt |
60 case class Write(s: String) extends Stmt |
15 case class Write(s: String) extends Stmt |
61 |
16 case class Read(s: String) extends Stmt |
|
17 |
|
18 // arithmetic expressions |
62 case class Var(s: String) extends AExp |
19 case class Var(s: String) extends AExp |
63 case class Num(i: Int) extends AExp |
20 case class Num(i: Int) extends AExp |
64 case class Aop(o: String, a1: AExp, a2: AExp) extends AExp |
21 case class Aop(o: String, a1: AExp, a2: AExp) extends AExp |
65 |
22 |
|
23 // boolean expressions |
66 case object True extends BExp |
24 case object True extends BExp |
67 case object False extends BExp |
25 case object False extends BExp |
68 case class Relop(o: String, a1: AExp, a2: AExp) extends BExp |
26 case class Bop(o: String, a1: AExp, a2: AExp) extends BExp |
69 |
27 |
70 // atomic parsers |
28 |
71 case class TokParser(tok: Token) extends Parser[List[Token], Token] { |
29 // compiler headers needed for the JVM |
72 def parse(ts: List[Token]) = ts match { |
30 // (contains an init method, as well as methods for read and write) |
73 case t::ts if (t == tok) => Set((t, ts)) |
|
74 case _ => Set () |
|
75 } |
|
76 } |
|
77 implicit def token2tparser(t: Token) = TokParser(t) |
|
78 |
|
79 case object NumParser extends Parser[List[Token], Int] { |
|
80 def parse(ts: List[Token]) = ts match { |
|
81 case T_NUM(s)::ts => Set((s.toInt, ts)) |
|
82 case _ => Set () |
|
83 } |
|
84 } |
|
85 |
|
86 case object IdParser extends Parser[List[Token], String] { |
|
87 def parse(ts: List[Token]) = ts match { |
|
88 case T_ID(s)::ts => Set((s, ts)) |
|
89 case _ => Set () |
|
90 } |
|
91 } |
|
92 |
|
93 |
|
94 // arithmetic expressions |
|
95 lazy val AExp: Parser[List[Token], AExp] = |
|
96 (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || |
|
97 (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T |
|
98 lazy val T: Parser[List[Token], AExp] = |
|
99 (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F |
|
100 lazy val F: Parser[List[Token], AExp] = |
|
101 (T_LPAREN ~> AExp <~ T_RPAREN) || |
|
102 IdParser ==> Var || |
|
103 NumParser ==> Num |
|
104 |
|
105 // boolean expressions |
|
106 lazy val BExp: Parser[List[Token], BExp] = |
|
107 (T_KWD("true") ==> ((_) => True: BExp)) || |
|
108 (T_KWD("false") ==> ((_) => False: BExp)) || |
|
109 (T_LPAREN ~> BExp <~ T_RPAREN) || |
|
110 (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } || |
|
111 (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } || |
|
112 (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } || |
|
113 (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp } |
|
114 |
|
115 lazy val Stmt: Parser[List[Token], Stmt] = |
|
116 (T_KWD("skip") ==> ((_) => Skip: Stmt)) || |
|
117 (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || |
|
118 (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> |
|
119 { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || |
|
120 (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || |
|
121 (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } |
|
122 |
|
123 lazy val Stmts: Parser[List[Token], Block] = |
|
124 (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || |
|
125 (Stmt ==> ((s) => List(s) : Block)) |
|
126 |
|
127 lazy val Block: Parser[List[Token], Block] = |
|
128 (T_BEGIN ~> Stmts <~ T_END) || |
|
129 (Stmt ==> ((s) => List(s))) |
|
130 |
|
131 // compiler |
|
132 val beginning = """ |
31 val beginning = """ |
133 .class public XXX.XXX |
32 .class public XXX.XXX |
134 .super java/lang/Object |
33 .super java/lang/Object |
135 |
34 |
136 .method public <init>()V |
35 .method public <init>()V |
169 def Fresh(x: String) = { |
101 def Fresh(x: String) = { |
170 counter += 1 |
102 counter += 1 |
171 x ++ "_" ++ counter.toString() |
103 x ++ "_" ++ counter.toString() |
172 } |
104 } |
173 |
105 |
|
106 // environments and instructions |
174 type Env = Map[String, String] |
107 type Env = Map[String, String] |
175 type Instrs = List[String] |
108 type Instrs = List[String] |
176 |
109 |
|
110 // arithmetic expression compilation |
177 def compile_aexp(a: AExp, env : Env) : Instrs = a match { |
111 def compile_aexp(a: AExp, env : Env) : Instrs = a match { |
178 case Num(i) => List("ldc " + i.toString + "\n") |
112 case Num(i) => List("ldc " + i.toString + "\n") |
179 case Var(s) => List("iload " + env(s) + "\n") |
113 case Var(s) => List("iload " + env(s) + "\n") |
180 case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n") |
114 case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n") |
181 case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n") |
115 case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n") |
182 case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n") |
116 case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n") |
183 } |
117 } |
184 |
118 |
|
119 // boolean expression compilation |
185 def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match { |
120 def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match { |
186 case True => Nil |
121 case True => Nil |
187 case False => List("goto " + jmp + "\n") |
122 case False => List("goto " + jmp + "\n") |
188 case Relop("=", a1, a2) => |
123 case Bop("=", a1, a2) => |
189 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n") |
124 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n") |
190 case Relop("!=", a1, a2) => |
125 case Bop("!=", a1, a2) => |
191 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n") |
126 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n") |
192 case Relop("<", a1, a2) => |
127 case Bop("<", a1, a2) => |
193 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n") |
128 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n") |
194 } |
129 } |
195 |
130 |
196 |
131 // statement compilation |
197 def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match { |
132 def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match { |
198 case Skip => (Nil, env) |
133 case Skip => (Nil, env) |
199 case Assign(x, a) => { |
134 case Assign(x, a) => { |
200 val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString |
135 val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString |
201 (compile_aexp(a, env) ++ |
136 (compile_aexp(a, env) ++ |
202 List("istore " + index + "\n"), env + (x -> index)) |
137 List("istore " + index + "\n"), env + (x -> index)) |
203 } |
138 } |
204 case If(b, bl1, bl2) => { |
139 case If(b, bl1, bl2) => { |
205 val if_else = Fresh("If_else") |
140 val if_else = Fresh("If_else") |
206 val if_end = Fresh("If_end") |
141 val if_end = Fresh("If_end") |
207 val (instrs1, env1) = compile_bl(bl1, env) |
142 val (instrs1, env1) = compile_block(bl1, env) |
208 val (instrs2, env2) = compile_bl(bl2, env1) |
143 val (instrs2, env2) = compile_block(bl2, env1) |
209 (compile_bexp(b, env, if_else) ++ |
144 (compile_bexp(b, env, if_else) ++ |
210 instrs1 ++ |
145 instrs1 ++ |
211 List("goto " + if_end + "\n") ++ |
146 List("goto " + if_end + "\n") ++ |
212 List("\n" + if_else + ":\n\n") ++ |
147 List("\n" + if_else + ":\n\n") ++ |
213 instrs2 ++ |
148 instrs2 ++ |
214 List("\n" + if_end + ":\n\n"), env2) |
149 List("\n" + if_end + ":\n\n"), env2) |
215 } |
150 } |
216 case While(b, bl) => { |
151 case While(b, bl) => { |
217 val loop_begin = Fresh("Loop_begin") |
152 val loop_begin = Fresh("Loop_begin") |
218 val loop_end = Fresh("Loop_end") |
153 val loop_end = Fresh("Loop_end") |
219 val (instrs1, env1) = compile_bl(bl, env) |
154 val (instrs1, env1) = compile_block(bl, env) |
220 (List("\n" + loop_begin + ":\n\n") ++ |
155 (List("\n" + loop_begin + ":\n\n") ++ |
221 compile_bexp(b, env, loop_end) ++ |
156 compile_bexp(b, env, loop_end) ++ |
222 instrs1 ++ |
157 instrs1 ++ |
223 List("goto " + loop_begin + "\n") ++ |
158 List("goto " + loop_begin + "\n") ++ |
224 List("\n" + loop_end + ":\n\n"), env1) |
159 List("\n" + loop_end + ":\n\n"), env1) |
225 } |
160 } |
226 case Write(x) => |
161 case Write(x) => |
227 (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env) |
162 (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env) |
228 } |
163 case Read(x) => { |
229 |
164 val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString |
230 def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match { |
165 (List("invokestatic XXX/XXX/read()I\n" + |
|
166 "istore " + index + "\n"), env + (x -> index)) |
|
167 } |
|
168 } |
|
169 |
|
170 // compilation of a block (i.e. list of instructions) |
|
171 def compile_block(bl: Block, env: Env) : (Instrs, Env) = bl match { |
231 case Nil => (Nil, env) |
172 case Nil => (Nil, env) |
232 case s::bl => { |
173 case s::bl => { |
233 val (instrs1, env1) = compile_stmt(s, env) |
174 val (instrs1, env1) = compile_stmt(s, env) |
234 val (instrs2, env2) = compile_bl(bl, env1) |
175 val (instrs2, env2) = compile_block(bl, env1) |
235 (instrs1 ++ instrs2, env2) |
176 (instrs1 ++ instrs2, env2) |
236 } |
177 } |
237 } |
178 } |
238 |
179 |
239 def compile(input: String) : String = { |
180 // main compilation function for blocks |
240 val class_name = input.split('.')(0) |
181 def compile(bl: Block, class_name: String) : String = { |
241 val tks = Tok.fromFile(input) |
182 val instructions = compile_block(bl, Map.empty)._1 |
242 val ast = Stmts.parse_single(tks) |
|
243 val instructions = compile_bl(ast, Map.empty)._1 |
|
244 (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) |
183 (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) |
245 } |
184 } |
246 |
185 |
247 |
186 |
248 def compile_to(input: String, output: String) = { |
187 // Fibonacci numbers as a test-case |
249 val fw = new java.io.FileWriter(output) |
188 val fib_test = |
250 fw.write(compile(input)) |
189 List(Read("n"), // read n; |
251 fw.close() |
190 Assign("minus1",Num(0)), // minus1 := 0; |
252 } |
191 Assign("minus2",Num(1)), // minus2 := 1; |
253 |
192 Assign("temp",Num(0)), // temp := 0; |
|
193 While(Bop("<",Num(0),Var("n")), // while n > 0 do { |
|
194 List(Assign("temp",Var("minus2")), // temp := minus2; |
|
195 Assign("minus2",Aop("+",Var("minus1"),Var("minus2"))), // minus2 := minus1 + minus2; |
|
196 Assign("minus1",Var("temp")), // minus1 := temp; |
|
197 Assign("n",Aop("-",Var("n"),Num(1))))), // n := n - 1 }; |
|
198 Write("minus1")) // write minus1 |
|
199 |
|
200 |
|
201 |
|
202 // prints out the JVM-assembly program |
|
203 |
|
204 println(compile(fib_test, "fib")) |
|
205 |
|
206 // can be assembled with |
254 // |
207 // |
255 val tks = Tok.fromString("x := x + 1") |
208 // java -jar jvm/jasmin-2.4/jasmin.jar fib.j |
256 val ast = Stmt.parse_single(tks) |
|
257 println(compile_stmt(ast, Map("x" -> "n"))._1.mkString) |
|
258 |
|
259 |
|
260 |
|
261 //examples |
|
262 |
|
263 compile_to("loops.while", "loops.j") |
|
264 //compile_to("fib.while", "fib.j") |
|
265 |
|
266 |
|
267 // testing cases for time measurements |
|
268 |
|
269 def time_needed[T](i: Int, code: => T) = { |
|
270 val start = System.nanoTime() |
|
271 for (j <- 1 to i) code |
|
272 val end = System.nanoTime() |
|
273 (end - start)/(i * 1.0e9) |
|
274 } |
|
275 |
|
276 // for testing |
|
277 import scala.sys.process._ |
|
278 |
|
279 val test_prog = """ |
|
280 start := XXX; |
|
281 x := start; |
|
282 y := start; |
|
283 z := start; |
|
284 while 0 < x do { |
|
285 while 0 < y do { |
|
286 while 0 < z do { |
|
287 z := z - 1 |
|
288 }; |
|
289 z := start; |
|
290 y := y - 1 |
|
291 }; |
|
292 y := start; |
|
293 x := x - 1 |
|
294 }; |
|
295 write x; |
|
296 write y; |
|
297 write z |
|
298 """ |
|
299 |
|
300 |
|
301 def compile_test(n: Int) : Unit = { |
|
302 val class_name = "LOOP" |
|
303 val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) |
|
304 val ast = Stmts.parse_single(tks) |
|
305 val instructions = compile_bl(ast, Map.empty)._1 |
|
306 val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) |
|
307 val fw = new java.io.FileWriter(class_name + ".j") |
|
308 fw.write(assembly) |
|
309 fw.close() |
|
310 val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!! |
|
311 println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!)) |
|
312 } |
|
313 |
|
314 List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_)) |
|
315 |
|
316 |
|
317 |
|
318 // Javabyte code assmbler |
|
319 // |
209 // |
320 // java -jar jvm/jasmin-2.4/jasmin.jar loops.j |
210 // and started with |
321 |
211 // |
322 |
212 // java fib/fib |
323 |
213 |
324 |
214 |
325 |
215 |
326 |
216 |
|
217 |