progs/tokenise.scala
changeset 645 30943d5491b6
parent 644 b4f5714485e1
child 670 551d018cbbac
equal deleted inserted replaced
644:b4f5714485e1 645:30943d5491b6
     4 // call with 
     4 // call with 
     5 //
     5 //
     6 //     scala tokenise.scala fib.while
     6 //     scala tokenise.scala fib.while
     7 //
     7 //
     8 //     scala tokenise.scala loops.while
     8 //     scala tokenise.scala loops.while
       
     9 //
       
    10 // this will generate a .tks file that can be deserialised back
       
    11 // into a list of tokens
       
    12 // you can add -Xno-patmat-analysis in order to get rid of the
       
    13 // match-not-exhaustive warning
     9 
    14 
    10 object Tokenise {
    15 object Tokenise {
    11 
    16 
    12 import scala.language.implicitConversions    
    17 import scala.language.implicitConversions    
    13 import scala.language.reflectiveCalls
    18 import scala.language.reflectiveCalls
   186 
   191 
   187 def lexing_simp(r: Rexp, s: String) = 
   192 def lexing_simp(r: Rexp, s: String) = 
   188   env(lex_simp(r, s.toList))
   193   env(lex_simp(r, s.toList))
   189 
   194 
   190 
   195 
   191 // The Lexing Rules for the Fun Language
   196 // The Lexing Rules for the WHILE Language
   192 
   197 
       
   198 // inefficient representations for some extended regular
       
   199 // expressions
   193 def PLUS(r: Rexp) = r ~ r.%
   200 def PLUS(r: Rexp) = r ~ r.%
   194 
   201 
   195 def Range(s : List[Char]) : Rexp = s match {
   202 def Range(s : List[Char]) : Rexp = s match {
   196   case Nil => ZERO
   203   case Nil => ZERO
   197   case c::Nil => CHAR(c)
   204   case c::Nil => CHAR(c)
   221                   ("p" $ (LPAREN | RPAREN)) | 
   228                   ("p" $ (LPAREN | RPAREN)) | 
   222                   ("w" $ WHITESPACE)).%
   229                   ("w" $ WHITESPACE)).%
   223 
   230 
   224 
   231 
   225 
   232 
   226 // Generating tokens for the WHILE language
   233 // Generate tokens for the WHILE language
   227 // and serialising them into a .tks file
   234 // and serialise them into a .tks file
   228 
   235 
   229 import java.io._
   236 import java.io._
   230 
   237 
   231 abstract class Token extends Serializable 
   238 abstract class Token extends Serializable 
   232 case object T_SEMI extends Token
   239 case object T_SEMI extends Token
   236 case class T_OP(s: String) extends Token
   243 case class T_OP(s: String) extends Token
   237 case class T_NUM(n: Int) extends Token
   244 case class T_NUM(n: Int) extends Token
   238 case class T_KWD(s: String) extends Token
   245 case class T_KWD(s: String) extends Token
   239 case class T_STR(s: String) extends Token
   246 case class T_STR(s: String) extends Token
   240 
   247 
       
   248 // transforms pairs into tokens
   241 val token : PartialFunction[(String, String), Token] = {
   249 val token : PartialFunction[(String, String), Token] = {
   242   case ("s", _) => T_SEMI
   250   case ("s", _) => T_SEMI
   243   case ("p", "{") => T_LPAREN
   251   case ("p", "{") => T_LPAREN
   244   case ("p", "}") => T_RPAREN
   252   case ("p", "}") => T_RPAREN
   245   case ("i", s) => T_ID(s)
   253   case ("i", s) => T_ID(s)
   247   case ("n", s) => T_NUM(s.toInt)
   255   case ("n", s) => T_NUM(s.toInt)
   248   case ("k", s) => T_KWD(s)
   256   case ("k", s) => T_KWD(s)
   249   case ("str", s) => T_STR(s)
   257   case ("str", s) => T_STR(s)
   250 }
   258 }
   251 
   259 
       
   260 // filters out all un-interesting token
   252 def tokenise(s: String) : List[Token] = 
   261 def tokenise(s: String) : List[Token] = 
   253   lexing_simp(WHILE_REGS, s).collect(token)
   262   lexing_simp(WHILE_REGS, s).collect(token)
   254 
   263 
   255 
   264 
   256 def serialise[T](fname: String, data: T) = {
   265 def serialise[T](fname: String, data: T) = {
   257   val out = new ObjectOutputStream(new FileOutputStream(fname))
   266   import scala.util.Using
   258   out.writeObject(data)
   267   Using(new ObjectOutputStream(new FileOutputStream(fname))) {
   259   out.close
   268     out => out.writeObject(data)
   260 }
   269   }
   261 
   270 }
   262 def main(args: Array[String]) = {
   271 
       
   272 def main(args: Array[String]) : Unit = {
   263   val fname = args(0)
   273   val fname = args(0)
       
   274   val tname = fname.stripSuffix(".while") ++ ".tks"
   264   val file = io.Source.fromFile(fname).mkString
   275   val file = io.Source.fromFile(fname).mkString
   265   val tks = fname.stripSuffix(".while") ++ ".tks"
   276   serialise(tname, tokenise(file))
   266   serialise(tks, tokenise(file))
   277 }
   267 }
   278 
   268 
   279 
   269 
   280 }
   270 }