4 // call with   | 
     4 // call with   | 
     5 //  | 
     5 //  | 
     6 //     scala tokenise.scala fib.while  | 
     6 //     scala tokenise.scala fib.while  | 
     7 //  | 
     7 //  | 
     8 //     scala tokenise.scala loops.while  | 
     8 //     scala tokenise.scala loops.while  | 
         | 
     9 //  | 
         | 
    10 // this will generate a .tks file that can be deserialised back  | 
         | 
    11 // into a list of tokens  | 
         | 
    12 // you can add -Xno-patmat-analysis in order to get rid of the  | 
         | 
    13 // match-not-exhaustive warning  | 
     9   | 
    14   | 
    10 object Tokenise { | 
    15 object Tokenise { | 
    11   | 
    16   | 
    12 import scala.language.implicitConversions      | 
    17 import scala.language.implicitConversions      | 
    13 import scala.language.reflectiveCalls  | 
    18 import scala.language.reflectiveCalls  | 
   186   | 
   191   | 
   187 def lexing_simp(r: Rexp, s: String) =   | 
   192 def lexing_simp(r: Rexp, s: String) =   | 
   188   env(lex_simp(r, s.toList))  | 
   193   env(lex_simp(r, s.toList))  | 
   189   | 
   194   | 
   190   | 
   195   | 
   191 // The Lexing Rules for the Fun Language  | 
   196 // The Lexing Rules for the WHILE Language  | 
   192   | 
   197   | 
         | 
   198 // inefficient representations for some extended regular  | 
         | 
   199 // expressions  | 
   193 def PLUS(r: Rexp) = r ~ r.%  | 
   200 def PLUS(r: Rexp) = r ~ r.%  | 
   194   | 
   201   | 
   195 def Range(s : List[Char]) : Rexp = s match { | 
   202 def Range(s : List[Char]) : Rexp = s match { | 
   196   case Nil => ZERO  | 
   203   case Nil => ZERO  | 
   197   case c::Nil => CHAR(c)  | 
   204   case c::Nil => CHAR(c)  | 
   221                   ("p" $ (LPAREN | RPAREN)) |  | 
   228                   ("p" $ (LPAREN | RPAREN)) |  | 
   222                   ("w" $ WHITESPACE)).% | 
   229                   ("w" $ WHITESPACE)).% | 
   223   | 
   230   | 
   224   | 
   231   | 
   225   | 
   232   | 
   226 // Generating tokens for the WHILE language  | 
   233 // Generate tokens for the WHILE language  | 
   227 // and serialising them into a .tks file  | 
   234 // and serialise them into a .tks file  | 
   228   | 
   235   | 
   229 import java.io._  | 
   236 import java.io._  | 
   230   | 
   237   | 
   231 abstract class Token extends Serializable   | 
   238 abstract class Token extends Serializable   | 
   232 case object T_SEMI extends Token  | 
   239 case object T_SEMI extends Token  | 
   236 case class T_OP(s: String) extends Token  | 
   243 case class T_OP(s: String) extends Token  | 
   237 case class T_NUM(n: Int) extends Token  | 
   244 case class T_NUM(n: Int) extends Token  | 
   238 case class T_KWD(s: String) extends Token  | 
   245 case class T_KWD(s: String) extends Token  | 
   239 case class T_STR(s: String) extends Token  | 
   246 case class T_STR(s: String) extends Token  | 
   240   | 
   247   | 
         | 
   248 // transforms pairs into tokens  | 
   241 val token : PartialFunction[(String, String), Token] = { | 
   249 val token : PartialFunction[(String, String), Token] = { | 
   242   case ("s", _) => T_SEMI | 
   250   case ("s", _) => T_SEMI | 
   243   case ("p", "{") => T_LPAREN | 
   251   case ("p", "{") => T_LPAREN | 
   244   case ("p", "}") => T_RPAREN | 
   252   case ("p", "}") => T_RPAREN | 
   245   case ("i", s) => T_ID(s) | 
   253   case ("i", s) => T_ID(s) | 
   247   case ("n", s) => T_NUM(s.toInt) | 
   255   case ("n", s) => T_NUM(s.toInt) | 
   248   case ("k", s) => T_KWD(s) | 
   256   case ("k", s) => T_KWD(s) | 
   249   case ("str", s) => T_STR(s) | 
   257   case ("str", s) => T_STR(s) | 
   250 }  | 
   258 }  | 
   251   | 
   259   | 
         | 
   260 // filters out all un-interesting token  | 
   252 def tokenise(s: String) : List[Token] =   | 
   261 def tokenise(s: String) : List[Token] =   | 
   253   lexing_simp(WHILE_REGS, s).collect(token)  | 
   262   lexing_simp(WHILE_REGS, s).collect(token)  | 
   254   | 
   263   | 
   255   | 
   264   | 
   256 def serialise[T](fname: String, data: T) = { | 
   265 def serialise[T](fname: String, data: T) = { | 
   257   val out = new ObjectOutputStream(new FileOutputStream(fname))  | 
   266   import scala.util.Using  | 
   258   out.writeObject(data)  | 
   267   Using(new ObjectOutputStream(new FileOutputStream(fname))) { | 
   259   out.close  | 
   268     out => out.writeObject(data)  | 
   260 }  | 
   269   }  | 
   261   | 
   270 }  | 
   262 def main(args: Array[String]) = { | 
   271   | 
         | 
   272 def main(args: Array[String]) : Unit = { | 
   263   val fname = args(0)  | 
   273   val fname = args(0)  | 
         | 
   274   val tname = fname.stripSuffix(".while") ++ ".tks" | 
   264   val file = io.Source.fromFile(fname).mkString  | 
   275   val file = io.Source.fromFile(fname).mkString  | 
   265   val tks = fname.stripSuffix(".while") ++ ".tks" | 
   276   serialise(tname, tokenise(file))  | 
   266   serialise(tks, tokenise(file))  | 
   277 }  | 
   267 }  | 
   278   | 
   268   | 
   279   | 
   269   | 
   280 }  | 
   270 }  | 
         |