|
1 |
|
2 // regular expressions including NOT |
|
3 abstract class Rexp |
|
4 |
|
5 case object NULL extends Rexp |
|
6 case object EMPTY extends Rexp |
|
7 case object ALLC extends Rexp // recognises any character |
|
8 case class CHAR(c: Char) extends Rexp |
|
9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
|
10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
|
11 case class STAR(r: Rexp) extends Rexp |
|
12 case class NOT(r: Rexp) extends Rexp // negation of a regular expression |
|
13 |
|
14 |
|
15 // nullable function: tests whether the regular |
|
16 // expression can recognise the empty string |
|
17 def nullable (r: Rexp) : Boolean = r match { |
|
18 case NULL => false |
|
19 case EMPTY => true |
|
20 case ALLC => false |
|
21 case CHAR(_) => false |
|
22 case ALT(r1, r2) => nullable(r1) || nullable(r2) |
|
23 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
|
24 case STAR(_) => true |
|
25 case NOT(r) => !(nullable(r)) |
|
26 } |
|
27 |
|
28 // tests whether a regular expression |
|
29 // cannot recognise more |
|
30 def no_more (r: Rexp) : Boolean = r match { |
|
31 case NULL => true |
|
32 case EMPTY => false |
|
33 case ALLC => false |
|
34 case CHAR(_) => false |
|
35 case ALT(r1, r2) => no_more(r1) && no_more(r2) |
|
36 case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1) |
|
37 case STAR(_) => false |
|
38 case NOT(r) => !(no_more(r)) |
|
39 } |
|
40 |
|
41 |
|
42 // derivative of a regular expression w.r.t. a character |
|
43 def der (c: Char, r: Rexp) : Rexp = r match { |
|
44 case NULL => NULL |
|
45 case EMPTY => NULL |
|
46 case ALLC => EMPTY |
|
47 case CHAR(d) => if (c == d) EMPTY else NULL |
|
48 case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
|
49 case SEQ(r1, r2) => |
|
50 if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |
|
51 else SEQ(der(c, r1), r2) |
|
52 case STAR(r) => SEQ(der(c, r), STAR(r)) |
|
53 case NOT(r) => NOT(der (c, r)) |
|
54 } |
|
55 |
|
56 // regular expression for specifying |
|
57 // ranges of characters |
|
58 def Range(s : List[Char]) : Rexp = s match { |
|
59 case Nil => NULL |
|
60 case c::Nil => CHAR(c) |
|
61 case c::s => ALT(CHAR(c), Range(s)) |
|
62 } |
|
63 def RANGE(s: String) = Range(s.toList) |
|
64 |
|
65 |
|
66 // one or more |
|
67 def PLUS(r: Rexp) = SEQ(r, STAR(r)) |
|
68 |
|
69 // many alternatives |
|
70 def Alts(rs: List[Rexp]) : Rexp = rs match { |
|
71 case Nil => NULL |
|
72 case r::Nil => r |
|
73 case r::rs => ALT(r, Alts(rs)) |
|
74 } |
|
75 def ALTS(rs: Rexp*) = Alts(rs.toList) |
|
76 |
|
77 // repetitions |
|
78 def Seqs(rs: List[Rexp]) : Rexp = rs match { |
|
79 case Nil => NULL |
|
80 case r::Nil => r |
|
81 case r::rs => SEQ(r, Seqs(rs)) |
|
82 } |
|
83 def SEQS(rs: Rexp*) = Seqs(rs.toList) |
|
84 |
|
85 // some convenience for typing in regular expressions |
|
86 def charlist2rexp(s : List[Char]) : Rexp = s match { |
|
87 case Nil => EMPTY |
|
88 case c::Nil => CHAR(c) |
|
89 case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
|
90 } |
|
91 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) |
|
92 |
|
93 |
|
94 type Rule[T] = (Rexp, List[Char] => T) |
|
95 |
|
96 case class Tokenizer[T](rules: List[Rule[T]], excl: List[T] = Nil) { |
|
97 |
|
98 def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] = |
|
99 s match { |
|
100 case Nil if (nullable(r)) => Some(Nil, action(t)) |
|
101 case Nil => None |
|
102 case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t)) |
|
103 case c::s if (no_more(der (c, r))) => None |
|
104 case c::s => munch(der (c, r), action, s, t ::: List(c)) |
|
105 } |
|
106 |
|
107 def one_token(s: List[Char]) : Either[(List[Char], T), String] = { |
|
108 val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten |
|
109 if (somes == Nil) Right(s.mkString) |
|
110 else Left(somes sortBy (_._1.length) head) |
|
111 } |
|
112 |
|
113 def tokenize(cs: List[Char]) : List[T] = cs match { |
|
114 case Nil => Nil |
|
115 case _ => one_token(cs) match { |
|
116 case Left((rest, token)) => token :: tokenize(rest) |
|
117 case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil } |
|
118 } |
|
119 } |
|
120 |
|
121 def fromString(s: String) : List[T] = |
|
122 tokenize(s.toList).filterNot(excl.contains(_)) |
|
123 |
|
124 def fromFile(name: String) : List[T] = |
|
125 fromString(io.Source.fromFile(name).mkString) |
|
126 |
|
127 } |
|
128 |
|
129 |
|
130 // parser combinators with input type I and return type T |
|
131 |
|
132 abstract class Parser[I <% Seq[_], T] { |
|
133 def parse(ts: I): Set[(T, I)] |
|
134 |
|
135 def parse_all(ts: I) : Set[T] = |
|
136 for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head |
|
137 |
|
138 def parse_single(ts: I) : T = parse_all(ts).toList match { |
|
139 case t::Nil => t |
|
140 case _ => { println ("Parse Error") ; sys.exit(-1) } |
|
141 } |
|
142 |
|
143 def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) |
|
144 def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) |
|
145 def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) |
|
146 def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) |
|
147 def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) |
|
148 } |
|
149 |
|
150 class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { |
|
151 def parse(sb: I) = |
|
152 for ((head1, tail1) <- p.parse(sb); |
|
153 (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) |
|
154 } |
|
155 |
|
156 class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { |
|
157 def parse(sb: I) = p.parse(sb) ++ q.parse(sb) |
|
158 } |
|
159 |
|
160 class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { |
|
161 def parse(sb: I) = |
|
162 for ((head, tail) <- p.parse(sb)) yield (f(head), tail) |
|
163 } |
|
164 |
|
165 |
|
166 // A parser and evaluator for teh while language |
|
167 // |
|
168 //:load matcher.scala |
|
169 //:load parser3.scala |
|
170 |
|
171 // some regular expressions |
|
172 val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") |
|
173 val DIGIT = RANGE("0123456789") |
|
174 val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT))) |
|
175 val NUM = PLUS(DIGIT) |
|
176 val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write") |
|
177 val SEMI: Rexp = ";" |
|
178 val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">") |
|
179 val WHITESPACE = PLUS(RANGE(" \n")) |
|
180 val RPAREN: Rexp = ")" |
|
181 val LPAREN: Rexp = "(" |
|
182 val BEGIN: Rexp = "{" |
|
183 val END: Rexp = "}" |
|
184 val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/") |
|
185 |
|
186 // tokens for classifying the strings that have been recognised |
|
187 abstract class Token |
|
188 case object T_WHITESPACE extends Token |
|
189 case object T_COMMENT extends Token |
|
190 case object T_SEMI extends Token |
|
191 case object T_LPAREN extends Token |
|
192 case object T_RPAREN extends Token |
|
193 case object T_BEGIN extends Token |
|
194 case object T_END extends Token |
|
195 case class T_ID(s: String) extends Token |
|
196 case class T_OP(s: String) extends Token |
|
197 case class T_NUM(s: String) extends Token |
|
198 case class T_KWD(s: String) extends Token |
|
199 |
|
200 val lexing_rules: List[Rule[Token]] = |
|
201 List((KEYWORD, (s) => T_KWD(s.mkString)), |
|
202 (ID, (s) => T_ID(s.mkString)), |
|
203 (OP, (s) => T_OP(s.mkString)), |
|
204 (NUM, (s) => T_NUM(s.mkString)), |
|
205 (SEMI, (s) => T_SEMI), |
|
206 (LPAREN, (s) => T_LPAREN), |
|
207 (RPAREN, (s) => T_RPAREN), |
|
208 (BEGIN, (s) => T_BEGIN), |
|
209 (END, (s) => T_END), |
|
210 (WHITESPACE, (s) => T_WHITESPACE), |
|
211 (COMMENT, (s) => T_COMMENT)) |
|
212 |
|
213 // the tokenizer |
|
214 val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT)) |
|
215 |
|
216 // the abstract syntax trees |
|
217 abstract class Stmt |
|
218 abstract class AExp |
|
219 abstract class BExp |
|
220 type Block = List[Stmt] |
|
221 case object Skip extends Stmt |
|
222 case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt |
|
223 case class While(b: BExp, bl: Block) extends Stmt |
|
224 case class Assign(s: String, a: AExp) extends Stmt |
|
225 case class Write(s: String) extends Stmt |
|
226 |
|
227 case class Var(s: String) extends AExp |
|
228 case class Num(i: Int) extends AExp |
|
229 case class Aop(o: String, a1: AExp, a2: AExp) extends AExp |
|
230 |
|
231 case object True extends BExp |
|
232 case object False extends BExp |
|
233 case class Relop(o: String, a1: AExp, a2: AExp) extends BExp |
|
234 |
|
235 // atomic parsers |
|
236 case class TokParser(tok: Token) extends Parser[List[Token], Token] { |
|
237 def parse(ts: List[Token]) = ts match { |
|
238 case t::ts if (t == tok) => Set((t, ts)) |
|
239 case _ => Set () |
|
240 } |
|
241 } |
|
242 implicit def token2tparser(t: Token) = TokParser(t) |
|
243 |
|
244 case object NumParser extends Parser[List[Token], Int] { |
|
245 def parse(ts: List[Token]) = ts match { |
|
246 case T_NUM(s)::ts => Set((s.toInt, ts)) |
|
247 case _ => Set () |
|
248 } |
|
249 } |
|
250 |
|
251 case object IdParser extends Parser[List[Token], String] { |
|
252 def parse(ts: List[Token]) = ts match { |
|
253 case T_ID(s)::ts => Set((s, ts)) |
|
254 case _ => Set () |
|
255 } |
|
256 } |
|
257 |
|
258 |
|
259 // arithmetic expressions |
|
260 lazy val AExp: Parser[List[Token], AExp] = |
|
261 (T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } || |
|
262 (T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T |
|
263 lazy val T: Parser[List[Token], AExp] = |
|
264 (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F |
|
265 lazy val F: Parser[List[Token], AExp] = |
|
266 (T_LPAREN ~> AExp <~ T_RPAREN) || |
|
267 IdParser ==> Var || |
|
268 NumParser ==> Num |
|
269 |
|
270 // boolean expressions |
|
271 lazy val BExp: Parser[List[Token], BExp] = |
|
272 (T_KWD("true") ==> ((_) => True: BExp)) || |
|
273 (T_KWD("false") ==> ((_) => False: BExp)) || |
|
274 (T_LPAREN ~> BExp <~ T_RPAREN) || |
|
275 (AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } || |
|
276 (AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } || |
|
277 (AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } || |
|
278 (AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp } |
|
279 |
|
280 lazy val Stmt: Parser[List[Token], Stmt] = |
|
281 (T_KWD("skip") ==> ((_) => Skip: Stmt)) || |
|
282 (IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } || |
|
283 (T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==> |
|
284 { case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } || |
|
285 (T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } || |
|
286 (T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) } |
|
287 |
|
288 lazy val Stmts: Parser[List[Token], Block] = |
|
289 (Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } || |
|
290 (Stmt ==> ((s) => List(s) : Block)) |
|
291 |
|
292 lazy val Block: Parser[List[Token], Block] = |
|
293 (T_BEGIN ~> Stmts <~ T_END) || |
|
294 (Stmt ==> ((s) => List(s))) |
|
295 |
|
296 // compiler |
|
297 val beginning = """ |
|
298 .class public XXX.XXX |
|
299 .super java/lang/Object |
|
300 |
|
301 .method public <init>()V |
|
302 aload_0 |
|
303 invokenonvirtual java/lang/Object/<init>()V |
|
304 return |
|
305 .end method |
|
306 |
|
307 .method public static write(I)V |
|
308 .limit locals 5 |
|
309 .limit stack 5 |
|
310 iload 0 |
|
311 getstatic java/lang/System/out Ljava/io/PrintStream; |
|
312 swap |
|
313 invokevirtual java/io/PrintStream/println(I)V |
|
314 return |
|
315 .end method |
|
316 |
|
317 |
|
318 .method public static main([Ljava/lang/String;)V |
|
319 .limit locals 200 |
|
320 .limit stack 200 |
|
321 |
|
322 """ |
|
323 |
|
324 val ending = """ |
|
325 |
|
326 return |
|
327 |
|
328 .end method |
|
329 """ |
|
330 |
|
331 // for generating new labels |
|
332 var counter = -1 |
|
333 |
|
334 def Fresh(x: String) = { |
|
335 counter += 1 |
|
336 x ++ "_" ++ counter.toString() |
|
337 } |
|
338 |
|
339 type Env = Map[String, String] |
|
340 type Instrs = List[String] |
|
341 |
|
342 def compile_aexp(a: AExp, env : Env) : Instrs = a match { |
|
343 case Num(i) => List("ldc " + i.toString + "\n") |
|
344 case Var(s) => List("iload " + env(s) + "\n") |
|
345 case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n") |
|
346 case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n") |
|
347 case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n") |
|
348 } |
|
349 |
|
350 def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match { |
|
351 case True => Nil |
|
352 case False => List("goto " + jmp + "\n") |
|
353 case Relop("=", a1, a2) => |
|
354 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n") |
|
355 case Relop("!=", a1, a2) => |
|
356 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n") |
|
357 case Relop("<", a1, a2) => |
|
358 compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n") |
|
359 } |
|
360 |
|
361 |
|
362 def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match { |
|
363 case Skip => (Nil, env) |
|
364 case Assign(x, a) => { |
|
365 val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString |
|
366 (compile_aexp(a, env) ++ |
|
367 List("istore " + index + "\n"), env + (x -> index)) |
|
368 } |
|
369 case If(b, bl1, bl2) => { |
|
370 val if_else = Fresh("If_else") |
|
371 val if_end = Fresh("If_end") |
|
372 val (instrs1, env1) = compile_bl(bl1, env) |
|
373 val (instrs2, env2) = compile_bl(bl2, env1) |
|
374 (compile_bexp(b, env, if_else) ++ |
|
375 instrs1 ++ |
|
376 List("goto " + if_end + "\n") ++ |
|
377 List("\n" + if_else + ":\n\n") ++ |
|
378 instrs2 ++ |
|
379 List("\n" + if_end + ":\n\n"), env2) |
|
380 } |
|
381 case While(b, bl) => { |
|
382 val loop_begin = Fresh("Loop_begin") |
|
383 val loop_end = Fresh("Loop_end") |
|
384 val (instrs1, env1) = compile_bl(bl, env) |
|
385 (List("\n" + loop_begin + ":\n\n") ++ |
|
386 compile_bexp(b, env, loop_end) ++ |
|
387 instrs1 ++ |
|
388 List("goto " + loop_begin + "\n") ++ |
|
389 List("\n" + loop_end + ":\n\n"), env1) |
|
390 } |
|
391 case Write(x) => |
|
392 (List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env) |
|
393 } |
|
394 |
|
395 def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match { |
|
396 case Nil => (Nil, env) |
|
397 case s::bl => { |
|
398 val (instrs1, env1) = compile_stmt(s, env) |
|
399 val (instrs2, env2) = compile_bl(bl, env1) |
|
400 (instrs1 ++ instrs2, env2) |
|
401 } |
|
402 } |
|
403 |
|
404 def compile(input: String) : String = { |
|
405 val class_name = input.split('.')(0) |
|
406 val tks = Tok.fromFile(input) |
|
407 val ast = Stmts.parse_single(tks) |
|
408 val instructions = compile_bl(ast, Map.empty)._1 |
|
409 (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) |
|
410 } |
|
411 |
|
412 |
|
413 def compile_to(input: String, output: String) = { |
|
414 val fw = new java.io.FileWriter(output) |
|
415 fw.write(compile(input)) |
|
416 fw.close() |
|
417 } |
|
418 |
|
419 // |
|
420 val tks = Tok.fromString("x := x + 1") |
|
421 val ast = Stmt.parse_single(tks) |
|
422 println(compile_stmt(ast, Map("x" -> "n"))._1.mkString) |
|
423 |
|
424 |
|
425 |
|
426 //examples |
|
427 |
|
428 compile_to("loops.while", "loops.j") |
|
429 //compile_to("fib.while", "fib.j") |
|
430 |
|
431 |
|
432 // testing cases for time measurements |
|
433 /* |
|
434 def time_needed[T](i: Int, code: => T) = { |
|
435 val start = System.nanoTime() |
|
436 for (j <- 1 to i) code |
|
437 val end = System.nanoTime() |
|
438 (end - start)/(i * 1.0e9) |
|
439 } |
|
440 |
|
441 // for testing |
|
442 import scala.sys.process._ |
|
443 |
|
444 val test_prog = """ |
|
445 start := XXX; |
|
446 x := start; |
|
447 y := start; |
|
448 z := start; |
|
449 while 0 < x do { |
|
450 while 0 < y do { |
|
451 while 0 < z do { |
|
452 z := z - 1 |
|
453 }; |
|
454 z := start; |
|
455 y := y - 1 |
|
456 }; |
|
457 y := start; |
|
458 x := x - 1 |
|
459 }; |
|
460 write x; |
|
461 write y; |
|
462 write z |
|
463 """ |
|
464 |
|
465 |
|
466 def compile_test(n: Int) : Unit = { |
|
467 val class_name = "LOOP" |
|
468 val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString)) |
|
469 val ast = Stmts.parse_single(tks) |
|
470 val instructions = compile_bl(ast, Map.empty)._1 |
|
471 val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name) |
|
472 val fw = new java.io.FileWriter(class_name + ".j") |
|
473 fw.write(assembly) |
|
474 fw.close() |
|
475 val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!! |
|
476 println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!)) |
|
477 } |
|
478 |
|
479 List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_)) |
|
480 |
|
481 |
|
482 |
|
483 // javabyte code assmbler |
|
484 // |
|
485 // java -jar jvm/jasmin-2.4/jasmin.jar loops.j |
|
486 |
|
487 */ |
|
488 |
|
489 |
|
490 |
|
491 |
|
492 |