solutions/cw2/lexer.sc
changeset 920 7af2eea19646
parent 919 53f08d873e09
child 921 bb54e7aa1a3f
equal deleted inserted replaced
919:53f08d873e09 920:7af2eea19646
     1 import scala.language.implicitConversions    
     1 // CW 2
     2 import scala.language.reflectiveCalls
     2 //======
       
     3 
       
     4 
     3 
     5 
     4 // Rexp
     6 // Rexp
     5 abstract class Rexp
     7 abstract class Rexp
     6 case object ZERO extends Rexp
     8 case object ZERO extends Rexp
     7 case object ONE extends Rexp
     9 case object ONE extends Rexp
    25 case class Right(v: Val) extends Val
    27 case class Right(v: Val) extends Val
    26 case class Stars(vs: List[Val]) extends Val
    28 case class Stars(vs: List[Val]) extends Val
    27 case class Rec(x: String, v: Val) extends Val
    29 case class Rec(x: String, v: Val) extends Val
    28 
    30 
    29 
    31 
    30 // Convenience typing
    32 // Convenience for typing
    31 def charlist2rexp(s : List[Char]): Rexp = s match {
    33 def charlist2rexp(s : List[Char]): Rexp = s match {
    32   case Nil => ONE
    34   case Nil => ONE
    33   case c::Nil => CHAR(c)
    35   case c::Nil => CHAR(c)
    34   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    36   case c::s => SEQ(CHAR(c), charlist2rexp(s))
    35 }
    37 }
   205 def lexing_simp(r: Rexp, s: String) = env(lex_simp(r, s.toList))
   207 def lexing_simp(r: Rexp, s: String) = env(lex_simp(r, s.toList))
   206 
   208 
   207 // Language specific code
   209 // Language specific code
   208 val KEYWORD : Rexp = "while" | "if" | "then" | "else" | "do" | "for" | "to" | "true" | "false" | "read" | "write" | "skip" 
   210 val KEYWORD : Rexp = "while" | "if" | "then" | "else" | "do" | "for" | "to" | "true" | "false" | "read" | "write" | "skip" 
   209 val OP : Rexp = "+" | "-" | "*" | "%" | "/" | "==" | "!=" | ">" | "<" | ">=" | "<=" | ":=" | "&&" | "||"
   211 val OP : Rexp = "+" | "-" | "*" | "%" | "/" | "==" | "!=" | ">" | "<" | ">=" | "<=" | ":=" | "&&" | "||"
   210 val LET: Rexp = RANGE(('A' to 'Z').toSet ++ ('a' to 'z'))
   212 val LET: Rexp = RANGE(('A' to 'Z').toSet ++ ('a' to 'z').toSet)
   211 val SYM : Rexp = LET | RANGE(Set('.', '_', '>', '<', '=', ';', ',', ':', ')', '('))
   213 val SYM : Rexp = RANGE(Set('.', '_', '>', '<', '=', ';', ',', ':'))
   212 val PARENS : Rexp = "(" | "{" | ")" | "}"
   214 val PARENS : Rexp = "(" | "{" | ")" | "}"
   213 val SEMI : Rexp = ";"
   215 val SEMI : Rexp = ";"
   214 val WHITESPACE : Rexp = PLUS(" ") | "\n" | "\t" | "\r"
   216 val WHITESPACE : Rexp = PLUS(" ") | "\n" | "\t" | "\r"
   215 val DIGIT : Rexp = RANGE(('0' to '9').toSet)
   217 val DIGIT : Rexp = RANGE(('0' to '9').toSet)
   216 val DIGIT1 : Rexp = RANGE(('1' to '9').toSet)
   218 val DIGIT1 : Rexp = RANGE(('1' to '9').toSet)
   217 val STRING : Rexp = "\"" ~ (SYM | " " | "\\n" | DIGIT).% ~ "\""
   219 val STRING : Rexp = "\"" ~ (LET | DIGIT | SYM | PARENS | " " | "\\n").% ~ "\""
   218 val ID : Rexp = LET ~ (LET | "_" | DIGIT).%
   220 val ID : Rexp = LET ~ (LET | "_" | DIGIT).%
   219 val NUM : Rexp = "0" | (DIGIT1 ~ DIGIT.%)
   221 val NUM : Rexp = "0" | (DIGIT1 ~ DIGIT.%)
   220 val COMMENT : Rexp = "//" ~ (SYM | " " | DIGIT).% ~ ("\n" | "\r\n") 
   222 val EOL : Rexp = "\n" | "\r\n"
       
   223 val COMMENT : Rexp = "//" ~ (LET | DIGIT | SYM | " ").% ~ EOL 
   221 
   224 
   222 val WHILE_REGS = (("k" $ KEYWORD) | 
   225 val WHILE_REGS = (("k" $ KEYWORD) | 
   223                   ("o" $ OP) | 
   226                   ("o" $ OP) | 
   224                   ("str" $ STRING) |
   227                   ("str" $ STRING) |
   225                   ("p" $ PARENS) |
   228                   ("p" $ PARENS) |
   226                   ("s" $ SEMI) | 
   229                   ("s" $ SEMI) | 
   227                   ("w" $ WHITESPACE) | 
   230                   ("w" $ WHITESPACE) | 
   228                   ("i" $ ID) | 
   231                   ("i" $ ID) | 
   229                   ("n" $ NUM) |
   232                   ("n" $ NUM) |
   230 		              ("c" $ COMMENT)).%
   233 		  ("c" $ COMMENT)).%
   231 
   234 
   232 def esc(raw: String): String = {
   235 def esc(raw: String): String = {
   233   import scala.reflect.runtime.universe._
   236   import scala.reflect.runtime.universe._
   234   Literal(Constant(raw)).toString
   237   Literal(Constant(raw)).toString
   235 }
   238 }
   261 def tokenise(s: String) : List[Token] = 
   264 def tokenise(s: String) : List[Token] = 
   262   lexing_simp(WHILE_REGS, s).collect(token)
   265   lexing_simp(WHILE_REGS, s).collect(token)
   263 
   266 
   264 
   267 
   265 // Q2 Tests
   268 // Q2 Tests
       
   269 
   266 lex_simp(NTIMES("a", 3), "aaa".toList)
   270 lex_simp(NTIMES("a", 3), "aaa".toList)
   267 lex_simp(NTIMES(("a" | ONE), 3), "aa".toList)
   271 lex_simp(NTIMES(("a" | ONE), 3), "aa".toList)
   268 
   272 
   269 // Q3 Programs
   273 // Q3 Programs
   270 
   274 
   310 println(tokenise(prog1))
   314 println(tokenise(prog1))
   311 println(tokenise(prog2))
   315 println(tokenise(prog2))
   312 println(tokenise(prog3))
   316 println(tokenise(prog3))
   313 
   317 
   314 
   318 
   315 println("MY TESTS")
   319 // More tests
   316 
   320 
   317 println(lex_simp("x" $ OPTIONAL("a"), "a".toList))
   321 println(lex_simp("x" $ OPTIONAL("a"), "a".toList))
   318 println(lex_simp("x" $ OPTIONAL("a"), "".toList))
   322 println(lex_simp("x" $ OPTIONAL("a"), "".toList))
   319 println(lex_simp("x" $ NTIMES(OPTIONAL("a"),4), "aa".toList))
   323 println(lex_simp("x" $ NTIMES(OPTIONAL("a"),4), "aa".toList))
   320 println(lex_simp("x" $ OPTIONAL("aa"), "aa".toList))
   324 println(lex_simp("x" $ OPTIONAL("aa"), "aa".toList))