4 // call with |
4 // call with |
5 // |
5 // |
6 // scala tokenise.scala fib.while |
6 // scala tokenise.scala fib.while |
7 // |
7 // |
8 // scala tokenise.scala loops.while |
8 // scala tokenise.scala loops.while |
|
9 // |
|
10 // this will generate a .tks file that can be deserialised back |
|
11 // into a list of tokens |
|
12 // you can add -Xno-patmat-analysis in order to get rid of the |
|
13 // match-not-exhaustive warning |
9 |
14 |
10 object Tokenise { |
15 object Tokenise { |
11 |
16 |
12 import scala.language.implicitConversions |
17 import scala.language.implicitConversions |
13 import scala.language.reflectiveCalls |
18 import scala.language.reflectiveCalls |
186 |
191 |
187 def lexing_simp(r: Rexp, s: String) = |
192 def lexing_simp(r: Rexp, s: String) = |
188 env(lex_simp(r, s.toList)) |
193 env(lex_simp(r, s.toList)) |
189 |
194 |
190 |
195 |
191 // The Lexing Rules for the Fun Language |
196 // The Lexing Rules for the WHILE Language |
192 |
197 |
|
198 // inefficient representations for some extended regular |
|
199 // expressions |
193 def PLUS(r: Rexp) = r ~ r.% |
200 def PLUS(r: Rexp) = r ~ r.% |
194 |
201 |
195 def Range(s : List[Char]) : Rexp = s match { |
202 def Range(s : List[Char]) : Rexp = s match { |
196 case Nil => ZERO |
203 case Nil => ZERO |
197 case c::Nil => CHAR(c) |
204 case c::Nil => CHAR(c) |
221 ("p" $ (LPAREN | RPAREN)) | |
228 ("p" $ (LPAREN | RPAREN)) | |
222 ("w" $ WHITESPACE)).% |
229 ("w" $ WHITESPACE)).% |
223 |
230 |
224 |
231 |
225 |
232 |
226 // Generating tokens for the WHILE language |
233 // Generate tokens for the WHILE language |
227 // and serialising them into a .tks file |
234 // and serialise them into a .tks file |
228 |
235 |
229 import java.io._ |
236 import java.io._ |
230 |
237 |
231 abstract class Token extends Serializable |
238 abstract class Token extends Serializable |
232 case object T_SEMI extends Token |
239 case object T_SEMI extends Token |
236 case class T_OP(s: String) extends Token |
243 case class T_OP(s: String) extends Token |
237 case class T_NUM(n: Int) extends Token |
244 case class T_NUM(n: Int) extends Token |
238 case class T_KWD(s: String) extends Token |
245 case class T_KWD(s: String) extends Token |
239 case class T_STR(s: String) extends Token |
246 case class T_STR(s: String) extends Token |
240 |
247 |
|
248 // transforms pairs into tokens |
241 val token : PartialFunction[(String, String), Token] = { |
249 val token : PartialFunction[(String, String), Token] = { |
242 case ("s", _) => T_SEMI |
250 case ("s", _) => T_SEMI |
243 case ("p", "{") => T_LPAREN |
251 case ("p", "{") => T_LPAREN |
244 case ("p", "}") => T_RPAREN |
252 case ("p", "}") => T_RPAREN |
245 case ("i", s) => T_ID(s) |
253 case ("i", s) => T_ID(s) |
247 case ("n", s) => T_NUM(s.toInt) |
255 case ("n", s) => T_NUM(s.toInt) |
248 case ("k", s) => T_KWD(s) |
256 case ("k", s) => T_KWD(s) |
249 case ("str", s) => T_STR(s) |
257 case ("str", s) => T_STR(s) |
250 } |
258 } |
251 |
259 |
|
260 // filters out all un-interesting token |
252 def tokenise(s: String) : List[Token] = |
261 def tokenise(s: String) : List[Token] = |
253 lexing_simp(WHILE_REGS, s).collect(token) |
262 lexing_simp(WHILE_REGS, s).collect(token) |
254 |
263 |
255 |
264 |
256 def serialise[T](fname: String, data: T) = { |
265 def serialise[T](fname: String, data: T) = { |
257 val out = new ObjectOutputStream(new FileOutputStream(fname)) |
266 import scala.util.Using |
258 out.writeObject(data) |
267 Using(new ObjectOutputStream(new FileOutputStream(fname))) { |
259 out.close |
268 out => out.writeObject(data) |
260 } |
269 } |
261 |
270 } |
262 def main(args: Array[String]) = { |
271 |
|
272 def main(args: Array[String]) : Unit = { |
263 val fname = args(0) |
273 val fname = args(0) |
|
274 val tname = fname.stripSuffix(".while") ++ ".tks" |
264 val file = io.Source.fromFile(fname).mkString |
275 val file = io.Source.fromFile(fname).mkString |
265 val tks = fname.stripSuffix(".while") ++ ".tks" |
276 serialise(tname, tokenise(file)) |
266 serialise(tks, tokenise(file)) |
277 } |
267 } |
278 |
268 |
279 |
269 |
280 } |
270 } |
|