progs/token.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Sun, 27 Oct 2013 14:17:55 +0000
changeset 164 6c1d214c39ef
child 165 66b699c80479
permissions -rw-r--r--
added progs
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
164
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     1
import scala.language.implicitConversions
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     2
import scala.language.reflectiveCalls
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     3
import scala.util._
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     4
import scala.annotation.tailrec
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     5
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     6
sealed abstract class Rexp
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     7
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     8
case object NULL extends Rexp
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
     9
case object EMPTY extends Rexp
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    10
case class CHAR(c: Char) extends Rexp
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    11
case class ALT(r1: Rexp, r2: Rexp) extends Rexp
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    12
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    13
case class STAR(r: Rexp) extends Rexp
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    14
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    15
def charlist2rexp(s : List[Char]) : Rexp = s match {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    16
  case Nil => EMPTY
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    17
  case c::Nil => CHAR(c)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    18
  case c::s => SEQ(CHAR(c), charlist2rexp(s))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    19
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    20
implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    21
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    22
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    23
implicit def RexpOps(r: Rexp) = new {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    24
  def | (s: Rexp) = ALT(r, s)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    25
  def % = STAR(r)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    26
  def ~ (s: Rexp) = SEQ(r, s)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    27
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    28
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    29
implicit def stringOps(s: String) = new {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    30
  def | (r: Rexp) = ALT(s, r)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    31
  def | (r: String) = ALT(s, r)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    32
  def % = STAR(s)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    33
  def ~ (r: Rexp) = SEQ(s, r)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    34
  def ~ (r: String) = SEQ(s, r)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    35
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    36
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    37
def Range(s : List[Char]) : Rexp = s match {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    38
  case Nil => NULL
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    39
  case c::Nil => CHAR(c)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    40
  case c::s => ALT(CHAR(c), Range(s))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    41
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    42
def RANGE(s: String) = Range(s.toList)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    43
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    44
def PLUS(r: Rexp) = SEQ(r, STAR(r))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    45
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    46
val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_")
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    47
val DIGIT = RANGE("0123456789")
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    48
val ID = SYM ~ (SYM | DIGIT).% 
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    49
val NUM = PLUS(DIGIT)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    50
val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" 
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    51
val SEMI: Rexp = ";"
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    52
val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    53
val WHITESPACE = PLUS(RANGE(" \n"))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    54
val RPAREN: Rexp = ")"
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    55
val LPAREN: Rexp = "("
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    56
val BEGIN: Rexp = "{"
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    57
val END: Rexp = "}"
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    58
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    59
//regular expressions ranked by position in the list
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    60
val regs: List[Rexp] = 
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    61
  List(KEYWORD, ID, OP, NUM, SEMI, LPAREN, RPAREN, BEGIN, END, WHITESPACE)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    62
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    63
def nullable (r: Rexp) : Boolean = r match {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    64
  case NULL => false
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    65
  case EMPTY => true
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    66
  case CHAR(_) => false
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    67
  case ALT(r1, r2) => nullable(r1) || nullable(r2)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    68
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    69
  case STAR(_) => true
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    70
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    71
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    72
def zeroable (r: Rexp) : Boolean = r match {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    73
  case NULL => true
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    74
  case EMPTY => false
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    75
  case CHAR(_) => false
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    76
  case ALT(r1, r2) => zeroable(r1) && zeroable(r2)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    77
  case SEQ(r1, r2) => zeroable(r1) || zeroable(r2)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    78
  case STAR(_) => false
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    79
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    80
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    81
def der (c: Char, r: Rexp) : Rexp = r match {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    82
  case NULL => NULL
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    83
  case EMPTY => NULL  
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    84
  case CHAR(d) => if (c == d) EMPTY else NULL
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    85
  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    86
  case SEQ(r1, r2) => 
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    87
    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    88
    else SEQ(der(c, r1), r2)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    89
  case STAR(r) => SEQ(der(c, r), STAR(r))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    90
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    91
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    92
// returns the position of the last Some-element in the list
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    93
def last(stack: List[Option[Rexp]]) : Int = stack match {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    94
  case Nil => 0
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    95
  case None::stack => last(stack)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    96
  case Some(r)::stack => 1 + stack.length
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    97
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    98
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
    99
// calculates derivatives until all of them are zeroable
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   100
@tailrec
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   101
def munch(cs: List[Char], rs: List[Rexp], stack: List[Option[Rexp]]) : Int = (cs, rs) match {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   102
  case (_, Nil) => last(stack)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   103
  case (Nil, _) => last(stack)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   104
  case (c::cs, rs) => {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   105
    val ds = rs.map(der(c, _))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   106
    val rs_nzero = ds.filterNot(zeroable(_))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   107
    val rs_nulls = ds.filter(nullable(_))
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   108
    val opt = Try(Some(rs_nulls.head)) getOrElse None
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   109
    munch(cs, rs_nzero, opt::stack)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   110
  }
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   111
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   112
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   113
// iterates the munching function and prints 
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   114
// out the component strings
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   115
@tailrec
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   116
def tokenize(s: String, rs: List[Rexp]) : Unit = munch(s.toList, rs, Nil) match {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   117
  case 0 if (s == "") => println("EOF")
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   118
  case 0 => println(s"Lexing error: $s")
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   119
  case n => {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   120
    val (head, tail) = s.splitAt(n)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   121
    print(s"|${head.replaceAll("\n","Ret")}|")
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   122
    tokenize(tail, rs)
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   123
  }
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   124
}
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   125
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   126
val test_prog = """
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   127
start := XXX;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   128
x := start;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   129
y := start;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   130
z := start;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   131
while 0 < x do {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   132
 while 0 < y do {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   133
  while 0 < z do {
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   134
    z := z - 1
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   135
  };
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   136
  z := start;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   137
  y := y - 1
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   138
 };     
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   139
 y := start;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   140
 x := x - 1
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   141
};
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   142
write x;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   143
write y;
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   144
write z
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   145
"""
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   146
6c1d214c39ef added progs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents:
diff changeset
   147
tokenize(test_prog, regs)