progs/token.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Mon, 17 Nov 2014 08:38:52 +0000
changeset 308 3703ade9b17c
parent 165 66b699c80479
child 352 1e1b0fe66107
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
164
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     1
import scala.language.implicitConversions
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     2
import scala.language.reflectiveCalls
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     3
import scala.util._
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     4
import scala.annotation.tailrec
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     5
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     6
sealed abstract class Rexp
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     7
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     8
case object NULL extends Rexp
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     9
case object EMPTY extends Rexp
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    10
case class CHAR(c: Char) extends Rexp
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    11
case class ALT(r1: Rexp, r2: Rexp) extends Rexp
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    12
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    13
case class STAR(r: Rexp) extends Rexp
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    14
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    15
def charlist2rexp(s : List[Char]) : Rexp = s match {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    16
  case Nil => EMPTY
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    17
  case c::Nil => CHAR(c)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    18
  case c::s => SEQ(CHAR(c), charlist2rexp(s))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    19
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    20
implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    21
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    22
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    23
implicit def RexpOps(r: Rexp) = new {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    24
  def | (s: Rexp) = ALT(r, s)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    25
  def % = STAR(r)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    26
  def ~ (s: Rexp) = SEQ(r, s)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    27
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    28
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    29
implicit def stringOps(s: String) = new {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    30
  def | (r: Rexp) = ALT(s, r)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    31
  def | (r: String) = ALT(s, r)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    32
  def % = STAR(s)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    33
  def ~ (r: Rexp) = SEQ(s, r)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    34
  def ~ (r: String) = SEQ(s, r)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    35
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    36
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    37
def Range(s : List[Char]) : Rexp = s match {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    38
  case Nil => NULL
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    39
  case c::Nil => CHAR(c)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    40
  case c::s => ALT(CHAR(c), Range(s))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    41
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    42
def RANGE(s: String) = Range(s.toList)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    43
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    44
def PLUS(r: Rexp) = SEQ(r, STAR(r))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    45
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    46
val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    47
val DIGIT = RANGE("0123456789")
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    48
val ID = SYM ~ (SYM | DIGIT).% 
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    49
val NUM = PLUS(DIGIT)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    50
val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" 
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    51
val SEMI: Rexp = ";"
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    52
val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    53
val WHITESPACE = PLUS(RANGE(" \n"))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    54
val RPAREN: Rexp = ")"
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    55
val LPAREN: Rexp = "("
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    56
val BEGIN: Rexp = "{"
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    57
val END: Rexp = "}"
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    58
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    59
//regular expressions ranked by position in the list
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    60
val regs: List[Rexp] = 
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    61
  List(KEYWORD, ID, OP, NUM, SEMI, LPAREN, RPAREN, BEGIN, END, WHITESPACE)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    62
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    63
def nullable (r: Rexp) : Boolean = r match {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    64
  case NULL => false
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    65
  case EMPTY => true
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    66
  case CHAR(_) => false
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    67
  case ALT(r1, r2) => nullable(r1) || nullable(r2)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    68
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    69
  case STAR(_) => true
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    70
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    71
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    72
def zeroable (r: Rexp) : Boolean = r match {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    73
  case NULL => true
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    74
  case EMPTY => false
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    75
  case CHAR(_) => false
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    76
  case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    77
  case SEQ(r1, r2) => zeroable(r1) || zeroable(r2)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    78
  case STAR(_) => false
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    79
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    80
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    81
def der (c: Char, r: Rexp) : Rexp = r match {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    82
  case NULL => NULL
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    83
  case EMPTY => NULL  
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    84
  case CHAR(d) => if (c == d) EMPTY else NULL
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    85
  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    86
  case SEQ(r1, r2) => 
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    87
    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    88
    else SEQ(der(c, r1), r2)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    89
  case STAR(r) => SEQ(der(c, r), STAR(r))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    90
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    91
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    92
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    93
// calculates derivatives until all of them are zeroable
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    94
@tailrec
165
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
    95
def munch(s: List[Char], 
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
    96
          pos: Int, 
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
    97
          rs: List[Rexp], 
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
    98
          last: Option[Int]): Option[Int] = rs match {
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
    99
  case Nil => last
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
   100
  case rs if (s.length <= pos) => last
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
   101
  case rs => {
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
   102
    val ders = rs.map(der(s(pos), _))
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
   103
    val rs_nzero = ders.filterNot(zeroable(_))
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
   104
    val rs_nulls = ders.filter(nullable(_))
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
   105
    val new_last = if (rs_nulls != Nil) Some(pos) else last
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
   106
    munch(s, 1 + pos, rs_nzero, new_last)
164
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   107
  }
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   108
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   109
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   110
// iterates the munching function and prints 
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   111
// out the component strings
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   112
@tailrec
165
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
   113
def tokenize(s: String, rs: List[Rexp]) : Unit = munch(s.toList, 0, rs, None) match {
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
   114
  case None if (s == "") => println("EOF")
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
   115
  case None => println(s"Lexing error: $s")
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
   116
  case Some(n) => {
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
   117
    val (head, tail) = s.splitAt(n + 1)
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
   118
    print(s"|${head.replaceAll("\n","RET")}|")
164
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   119
    tokenize(tail, rs)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   120
  }
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   121
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   122
165
66b699c80479 tuned progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 164
diff changeset
   123
164
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   124
val test_prog = """
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   125
start := XXX;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   126
x := start;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   127
y := start;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   128
z := start;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   129
while 0 < x do {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   130
 while 0 < y do {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   131
  while 0 < z do {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   132
    z := z - 1
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   133
  };
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   134
  z := start;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   135
  y := y - 1
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   136
 };     
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   137
 y := start;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   138
 x := x - 1
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   139
};
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   140
write x;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   141
write y;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   142
write z
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   143
"""
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   144
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   145
tokenize(test_prog, regs)