92
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
1 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
2 |
// regular expressions including NOT
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
3 |
abstract class Rexp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
4 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
5 |
case object NULL extends Rexp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
6 |
case object EMPTY extends Rexp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
7 |
case object ALLC extends Rexp // recognises any character
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
8 |
case class CHAR(c: Char) extends Rexp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
9 |
case class ALT(r1: Rexp, r2: Rexp) extends Rexp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
10 |
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
11 |
case class STAR(r: Rexp) extends Rexp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
12 |
case class NOT(r: Rexp) extends Rexp // negation of a regular expression
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
13 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
14 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
15 |
// nullable function: tests whether the regular
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
16 |
// expression can recognise the empty string
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
17 |
def nullable (r: Rexp) : Boolean = r match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
18 |
case NULL => false
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
19 |
case EMPTY => true
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
20 |
case ALLC => false
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
21 |
case CHAR(_) => false
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
22 |
case ALT(r1, r2) => nullable(r1) || nullable(r2)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
23 |
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
24 |
case STAR(_) => true
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
25 |
case NOT(r) => !(nullable(r))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
26 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
27 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
28 |
// tests whether a regular expression
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
29 |
// cannot recognise more
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
30 |
def no_more (r: Rexp) : Boolean = r match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
31 |
case NULL => true
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
32 |
case EMPTY => false
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
33 |
case ALLC => false
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
34 |
case CHAR(_) => false
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
35 |
case ALT(r1, r2) => no_more(r1) && no_more(r2)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
36 |
case SEQ(r1, r2) => if (nullable(r1)) (no_more(r1) && no_more(r2)) else no_more(r1)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
37 |
case STAR(_) => false
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
38 |
case NOT(r) => !(no_more(r))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
39 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
40 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
41 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
42 |
// derivative of a regular expression w.r.t. a character
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
43 |
def der (c: Char, r: Rexp) : Rexp = r match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
44 |
case NULL => NULL
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
45 |
case EMPTY => NULL
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
46 |
case ALLC => EMPTY
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
47 |
case CHAR(d) => if (c == d) EMPTY else NULL
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
48 |
case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
49 |
case SEQ(r1, r2) =>
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
50 |
if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
51 |
else SEQ(der(c, r1), r2)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
52 |
case STAR(r) => SEQ(der(c, r), STAR(r))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
53 |
case NOT(r) => NOT(der (c, r))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
54 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
55 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
56 |
// regular expression for specifying
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
57 |
// ranges of characters
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
58 |
def Range(s : List[Char]) : Rexp = s match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
59 |
case Nil => NULL
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
60 |
case c::Nil => CHAR(c)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
61 |
case c::s => ALT(CHAR(c), Range(s))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
62 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
63 |
def RANGE(s: String) = Range(s.toList)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
64 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
65 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
66 |
// one or more
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
67 |
def PLUS(r: Rexp) = SEQ(r, STAR(r))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
68 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
69 |
// many alternatives
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
70 |
def Alts(rs: List[Rexp]) : Rexp = rs match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
71 |
case Nil => NULL
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
72 |
case r::Nil => r
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
73 |
case r::rs => ALT(r, Alts(rs))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
74 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
75 |
def ALTS(rs: Rexp*) = Alts(rs.toList)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
76 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
77 |
// repetitions
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
78 |
def Seqs(rs: List[Rexp]) : Rexp = rs match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
79 |
case Nil => NULL
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
80 |
case r::Nil => r
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
81 |
case r::rs => SEQ(r, Seqs(rs))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
82 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
83 |
def SEQS(rs: Rexp*) = Seqs(rs.toList)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
84 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
85 |
// some convenience for typing in regular expressions
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
86 |
def charlist2rexp(s : List[Char]) : Rexp = s match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
87 |
case Nil => EMPTY
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
88 |
case c::Nil => CHAR(c)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
89 |
case c::s => SEQ(CHAR(c), charlist2rexp(s))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
90 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
91 |
implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
92 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
93 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
94 |
type Rule[T] = (Rexp, List[Char] => T)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
95 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
96 |
case class Tokenizer[T](rules: List[Rule[T]], excl: List[T] = Nil) {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
97 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
98 |
def munch(r: Rexp, action: List[Char] => T, s: List[Char], t: List[Char]) : Option[(List[Char], T)] =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
99 |
s match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
100 |
case Nil if (nullable(r)) => Some(Nil, action(t))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
101 |
case Nil => None
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
102 |
case c::s if (no_more(der (c, r)) && nullable(r)) => Some(c::s, action(t))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
103 |
case c::s if (no_more(der (c, r))) => None
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
104 |
case c::s => munch(der (c, r), action, s, t ::: List(c))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
105 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
106 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
107 |
def one_token(s: List[Char]) : Either[(List[Char], T), String] = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
108 |
val somes = rules.map { (r) => munch(r._1, r._2, s, Nil) }.flatten
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
109 |
if (somes == Nil) Right(s.mkString)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
110 |
else Left(somes sortBy (_._1.length) head)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
111 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
112 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
113 |
def tokenize(cs: List[Char]) : List[T] = cs match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
114 |
case Nil => Nil
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
115 |
case _ => one_token(cs) match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
116 |
case Left((rest, token)) => token :: tokenize(rest)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
117 |
case Right(s) => { println("Cannot tokenize: \"" + s + "\""); Nil }
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
118 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
119 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
120 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
121 |
def fromString(s: String) : List[T] =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
122 |
tokenize(s.toList).filterNot(excl.contains(_))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
123 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
124 |
def fromFile(name: String) : List[T] =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
125 |
fromString(io.Source.fromFile(name).mkString)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
126 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
127 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
128 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
129 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
130 |
// parser combinators with input type I and return type T
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
131 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
132 |
abstract class Parser[I <% Seq[_], T] {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
133 |
def parse(ts: I): Set[(T, I)]
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
134 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
135 |
def parse_all(ts: I) : Set[T] =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
136 |
for ((head, tail) <- parse(ts); if (tail.isEmpty)) yield head
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
137 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
138 |
def parse_single(ts: I) : T = parse_all(ts).toList match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
139 |
case t::Nil => t
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
140 |
case _ => { println ("Parse Error") ; sys.exit(-1) }
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
141 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
142 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
143 |
def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
144 |
def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
145 |
def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
146 |
def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
147 |
def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
148 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
149 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
150 |
class SeqParser[I <% Seq[_], T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
151 |
def parse(sb: I) =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
152 |
for ((head1, tail1) <- p.parse(sb);
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
153 |
(head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
154 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
155 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
156 |
class AltParser[I <% Seq[_], T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
157 |
def parse(sb: I) = p.parse(sb) ++ q.parse(sb)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
158 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
159 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
160 |
class FunParser[I <% Seq[_], T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
161 |
def parse(sb: I) =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
162 |
for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
163 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
164 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
165 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
166 |
// A parser and evaluator for teh while language
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
167 |
//
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
168 |
//:load matcher.scala
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
169 |
//:load parser3.scala
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
170 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
171 |
// some regular expressions
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
172 |
val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
173 |
val DIGIT = RANGE("0123456789")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
174 |
val ID = SEQ(SYM, STAR(ALT(SYM, DIGIT)))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
175 |
val NUM = PLUS(DIGIT)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
176 |
val KEYWORD = ALTS("skip", "while", "do", "if", "then", "else", "true", "false", "write")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
177 |
val SEMI: Rexp = ";"
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
178 |
val OP: Rexp = ALTS(":=", "=", "-", "+", "*", "!=", "<", ">")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
179 |
val WHITESPACE = PLUS(RANGE(" \n"))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
180 |
val RPAREN: Rexp = ")"
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
181 |
val LPAREN: Rexp = "("
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
182 |
val BEGIN: Rexp = "{"
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
183 |
val END: Rexp = "}"
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
184 |
val COMMENT = SEQS("/*", NOT(SEQS(STAR(ALLC), "*/", STAR(ALLC))), "*/")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
185 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
186 |
// tokens for classifying the strings that have been recognised
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
187 |
abstract class Token
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
188 |
case object T_WHITESPACE extends Token
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
189 |
case object T_COMMENT extends Token
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
190 |
case object T_SEMI extends Token
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
191 |
case object T_LPAREN extends Token
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
192 |
case object T_RPAREN extends Token
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
193 |
case object T_BEGIN extends Token
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
194 |
case object T_END extends Token
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
195 |
case class T_ID(s: String) extends Token
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
196 |
case class T_OP(s: String) extends Token
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
197 |
case class T_NUM(s: String) extends Token
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
198 |
case class T_KWD(s: String) extends Token
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
199 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
200 |
val lexing_rules: List[Rule[Token]] =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
201 |
List((KEYWORD, (s) => T_KWD(s.mkString)),
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
202 |
(ID, (s) => T_ID(s.mkString)),
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
203 |
(OP, (s) => T_OP(s.mkString)),
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
204 |
(NUM, (s) => T_NUM(s.mkString)),
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
205 |
(SEMI, (s) => T_SEMI),
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
206 |
(LPAREN, (s) => T_LPAREN),
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
207 |
(RPAREN, (s) => T_RPAREN),
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
208 |
(BEGIN, (s) => T_BEGIN),
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
209 |
(END, (s) => T_END),
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
210 |
(WHITESPACE, (s) => T_WHITESPACE),
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
211 |
(COMMENT, (s) => T_COMMENT))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
212 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
213 |
// the tokenizer
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
214 |
val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE, T_COMMENT))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
215 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
216 |
// the abstract syntax trees
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
217 |
abstract class Stmt
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
218 |
abstract class AExp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
219 |
abstract class BExp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
220 |
type Block = List[Stmt]
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
221 |
case object Skip extends Stmt
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
222 |
case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
223 |
case class While(b: BExp, bl: Block) extends Stmt
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
224 |
case class Assign(s: String, a: AExp) extends Stmt
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
225 |
case class Write(s: String) extends Stmt
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
226 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
227 |
case class Var(s: String) extends AExp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
228 |
case class Num(i: Int) extends AExp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
229 |
case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
230 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
231 |
case object True extends BExp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
232 |
case object False extends BExp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
233 |
case class Relop(o: String, a1: AExp, a2: AExp) extends BExp
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
234 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
235 |
// atomic parsers
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
236 |
case class TokParser(tok: Token) extends Parser[List[Token], Token] {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
237 |
def parse(ts: List[Token]) = ts match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
238 |
case t::ts if (t == tok) => Set((t, ts))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
239 |
case _ => Set ()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
240 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
241 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
242 |
implicit def token2tparser(t: Token) = TokParser(t)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
243 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
244 |
case object NumParser extends Parser[List[Token], Int] {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
245 |
def parse(ts: List[Token]) = ts match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
246 |
case T_NUM(s)::ts => Set((s.toInt, ts))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
247 |
case _ => Set ()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
248 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
249 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
250 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
251 |
case object IdParser extends Parser[List[Token], String] {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
252 |
def parse(ts: List[Token]) = ts match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
253 |
case T_ID(s)::ts => Set((s, ts))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
254 |
case _ => Set ()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
255 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
256 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
257 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
258 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
259 |
// arithmetic expressions
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
260 |
lazy val AExp: Parser[List[Token], AExp] =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
261 |
(T ~ T_OP("+") ~ AExp) ==> { case ((x, y), z) => Aop("+", x, z): AExp } ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
262 |
(T ~ T_OP("-") ~ AExp) ==> { case ((x, y), z) => Aop("-", x, z): AExp } || T
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
263 |
lazy val T: Parser[List[Token], AExp] =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
264 |
(F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => Aop("*", x, z): AExp } || F
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
265 |
lazy val F: Parser[List[Token], AExp] =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
266 |
(T_LPAREN ~> AExp <~ T_RPAREN) ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
267 |
IdParser ==> Var ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
268 |
NumParser ==> Num
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
269 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
270 |
// boolean expressions
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
271 |
lazy val BExp: Parser[List[Token], BExp] =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
272 |
(T_KWD("true") ==> ((_) => True: BExp)) ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
273 |
(T_KWD("false") ==> ((_) => False: BExp)) ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
274 |
(T_LPAREN ~> BExp <~ T_RPAREN) ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
275 |
(AExp ~ T_OP("=") ~ AExp) ==> { case ((x, y), z) => Relop("=", x, z): BExp } ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
276 |
(AExp ~ T_OP("!=") ~ AExp) ==> { case ((x, y), z) => Relop("!=", x, z): BExp } ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
277 |
(AExp ~ T_OP("<") ~ AExp) ==> { case ((x, y), z) => Relop("<", x, z): BExp } ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
278 |
(AExp ~ T_OP(">") ~ AExp) ==> { case ((x, y), z) => Relop("<", z, x): BExp }
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
279 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
280 |
lazy val Stmt: Parser[List[Token], Stmt] =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
281 |
(T_KWD("skip") ==> ((_) => Skip: Stmt)) ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
282 |
(IdParser ~ T_OP(":=") ~ AExp) ==> { case ((x, y), z) => Assign(x, z): Stmt } ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
283 |
(T_KWD("if") ~ BExp ~ T_KWD("then") ~ Block ~ T_KWD("else") ~ Block) ==>
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
284 |
{ case (((((x,y),z),u),v),w) => If(y, u, w): Stmt } ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
285 |
(T_KWD("while") ~ BExp ~ T_KWD("do") ~ Block) ==> { case (((x, y), z), w) => While(y, w) } ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
286 |
(T_KWD("write") ~ IdParser) ==> { case (x, y) => Write(y) }
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
287 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
288 |
lazy val Stmts: Parser[List[Token], Block] =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
289 |
(Stmt ~ T_SEMI ~ Stmts) ==> { case ((x, y), z) => x :: z : Block } ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
290 |
(Stmt ==> ((s) => List(s) : Block))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
291 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
292 |
lazy val Block: Parser[List[Token], Block] =
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
293 |
(T_BEGIN ~> Stmts <~ T_END) ||
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
294 |
(Stmt ==> ((s) => List(s)))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
295 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
296 |
// compiler
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
297 |
val beginning = """
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
298 |
.class public XXX.XXX
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
299 |
.super java/lang/Object
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
300 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
301 |
.method public <init>()V
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
302 |
aload_0
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
303 |
invokenonvirtual java/lang/Object/<init>()V
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
304 |
return
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
305 |
.end method
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
306 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
307 |
.method public static write(I)V
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
308 |
.limit locals 5
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
309 |
.limit stack 5
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
310 |
iload 0
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
311 |
getstatic java/lang/System/out Ljava/io/PrintStream;
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
312 |
swap
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
313 |
invokevirtual java/io/PrintStream/println(I)V
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
314 |
return
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
315 |
.end method
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
316 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
317 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
318 |
.method public static main([Ljava/lang/String;)V
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
319 |
.limit locals 200
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
320 |
.limit stack 200
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
321 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
322 |
"""
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
323 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
324 |
val ending = """
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
325 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
326 |
return
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
327 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
328 |
.end method
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
329 |
"""
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
330 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
331 |
// for generating new labels
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
332 |
var counter = -1
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
333 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
334 |
def Fresh(x: String) = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
335 |
counter += 1
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
336 |
x ++ "_" ++ counter.toString()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
337 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
338 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
339 |
type Env = Map[String, String]
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
340 |
type Instrs = List[String]
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
341 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
342 |
def compile_aexp(a: AExp, env : Env) : Instrs = a match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
343 |
case Num(i) => List("ldc " + i.toString + "\n")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
344 |
case Var(s) => List("iload " + env(s) + "\n")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
345 |
case Aop("+", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("iadd\n")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
346 |
case Aop("-", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("isub\n")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
347 |
case Aop("*", a1, a2) => compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("imul\n")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
348 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
349 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
350 |
def compile_bexp(b: BExp, env : Env, jmp: String) : Instrs = b match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
351 |
case True => Nil
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
352 |
case False => List("goto " + jmp + "\n")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
353 |
case Relop("=", a1, a2) =>
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
354 |
compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpne " + jmp + "\n")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
355 |
case Relop("!=", a1, a2) =>
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
356 |
compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpeq " + jmp + "\n")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
357 |
case Relop("<", a1, a2) =>
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
358 |
compile_aexp(a1, env) ++ compile_aexp(a2, env) ++ List("if_icmpge " + jmp + "\n")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
359 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
360 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
361 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
362 |
def compile_stmt(s: Stmt, env: Env) : (Instrs, Env) = s match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
363 |
case Skip => (Nil, env)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
364 |
case Assign(x, a) => {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
365 |
val index = if (env.isDefinedAt(x)) env(x) else env.keys.size.toString
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
366 |
(compile_aexp(a, env) ++
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
367 |
List("istore " + index + "\n"), env + (x -> index))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
368 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
369 |
case If(b, bl1, bl2) => {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
370 |
val if_else = Fresh("If_else")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
371 |
val if_end = Fresh("If_end")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
372 |
val (instrs1, env1) = compile_bl(bl1, env)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
373 |
val (instrs2, env2) = compile_bl(bl2, env1)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
374 |
(compile_bexp(b, env, if_else) ++
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
375 |
instrs1 ++
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
376 |
List("goto " + if_end + "\n") ++
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
377 |
List("\n" + if_else + ":\n\n") ++
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
378 |
instrs2 ++
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
379 |
List("\n" + if_end + ":\n\n"), env2)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
380 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
381 |
case While(b, bl) => {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
382 |
val loop_begin = Fresh("Loop_begin")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
383 |
val loop_end = Fresh("Loop_end")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
384 |
val (instrs1, env1) = compile_bl(bl, env)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
385 |
(List("\n" + loop_begin + ":\n\n") ++
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
386 |
compile_bexp(b, env, loop_end) ++
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
387 |
instrs1 ++
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
388 |
List("goto " + loop_begin + "\n") ++
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
389 |
List("\n" + loop_end + ":\n\n"), env1)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
390 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
391 |
case Write(x) =>
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
392 |
(List("iload " + env(x) + "\n" + "invokestatic XXX/XXX/write(I)V\n"), env)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
393 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
394 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
395 |
def compile_bl(bl: Block, env: Env) : (Instrs, Env) = bl match {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
396 |
case Nil => (Nil, env)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
397 |
case s::bl => {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
398 |
val (instrs1, env1) = compile_stmt(s, env)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
399 |
val (instrs2, env2) = compile_bl(bl, env1)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
400 |
(instrs1 ++ instrs2, env2)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
401 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
402 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
403 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
404 |
def compile(input: String) : String = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
405 |
val class_name = input.split('.')(0)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
406 |
val tks = Tok.fromFile(input)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
407 |
val ast = Stmts.parse_single(tks)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
408 |
val instructions = compile_bl(ast, Map.empty)._1
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
409 |
(beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
410 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
411 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
412 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
413 |
def compile_to(input: String, output: String) = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
414 |
val fw = new java.io.FileWriter(output)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
415 |
fw.write(compile(input))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
416 |
fw.close()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
417 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
418 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
419 |
//
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
420 |
val tks = Tok.fromString("x := x + 1")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
421 |
val ast = Stmt.parse_single(tks)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
422 |
println(compile_stmt(ast, Map("x" -> "n"))._1.mkString)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
423 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
424 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
425 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
426 |
//examples
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
427 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
428 |
compile_to("loops.while", "loops.j")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
429 |
//compile_to("fib.while", "fib.j")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
430 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
431 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
432 |
// testing cases for time measurements
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
433 |
/*
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
434 |
def time_needed[T](i: Int, code: => T) = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
435 |
val start = System.nanoTime()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
436 |
for (j <- 1 to i) code
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
437 |
val end = System.nanoTime()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
438 |
(end - start)/(i * 1.0e9)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
439 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
440 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
441 |
// for testing
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
442 |
import scala.sys.process._
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
443 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
444 |
val test_prog = """
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
445 |
start := XXX;
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
446 |
x := start;
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
447 |
y := start;
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
448 |
z := start;
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
449 |
while 0 < x do {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
450 |
while 0 < y do {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
451 |
while 0 < z do {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
452 |
z := z - 1
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
453 |
};
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
454 |
z := start;
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
455 |
y := y - 1
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
456 |
};
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
457 |
y := start;
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
458 |
x := x - 1
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
459 |
};
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
460 |
write x;
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
461 |
write y;
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
462 |
write z
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
463 |
"""
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
464 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
465 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
466 |
def compile_test(n: Int) : Unit = {
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
467 |
val class_name = "LOOP"
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
468 |
val tks = Tok.fromString(test_prog.replaceAllLiterally("XXX", n.toString))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
469 |
val ast = Stmts.parse_single(tks)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
470 |
val instructions = compile_bl(ast, Map.empty)._1
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
471 |
val assembly = (beginning ++ instructions.mkString ++ ending).replaceAllLiterally("XXX", class_name)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
472 |
val fw = new java.io.FileWriter(class_name + ".j")
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
473 |
fw.write(assembly)
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
474 |
fw.close()
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
475 |
val test = ("java -jar jvm/jasmin-2.4/jasmin.jar " + class_name + ".j").!!
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
476 |
println(n + " " + time_needed(2, ("java " + class_name + "/" + class_name).!!))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
477 |
}
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
478 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
479 |
List(1, 5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000).map(compile_test(_))
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
480 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
481 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
482 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
483 |
// javabyte code assmbler
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
484 |
//
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
485 |
// java -jar jvm/jasmin-2.4/jasmin.jar loops.j
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
486 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
487 |
*/
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
488 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
489 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
490 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
491 |
|
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff
changeset
|
492 |
|