|      1 import scala.language.implicitConversions |         | 
|      2 import scala.language.reflectiveCalls |         | 
|      3 import scala.util._ |         | 
|      4 import scala.annotation.tailrec |         | 
|      5  |         | 
|      6 sealed abstract class Rexp |         | 
|      7  |         | 
|      8 case object NULL extends Rexp |         | 
|      9 case object EMPTY extends Rexp |         | 
|     10 case class CHAR(c: Char) extends Rexp |         | 
|     11 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |         | 
|     12 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |         | 
|     13 case class STAR(r: Rexp) extends Rexp |         | 
|     14  |         | 
|     15 def charlist2rexp(s : List[Char]) : Rexp = s match { |         | 
|     16   case Nil => EMPTY |         | 
|     17   case c::Nil => CHAR(c) |         | 
|     18   case c::s => SEQ(CHAR(c), charlist2rexp(s)) |         | 
|     19 } |         | 
|     20 implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) |         | 
|     21  |         | 
|     22  |         | 
|     23 implicit def RexpOps(r: Rexp) = new { |         | 
|     24   def | (s: Rexp) = ALT(r, s) |         | 
|     25   def % = STAR(r) |         | 
|     26   def ~ (s: Rexp) = SEQ(r, s) |         | 
|     27 } |         | 
|     28  |         | 
|     29 implicit def stringOps(s: String) = new { |         | 
|     30   def | (r: Rexp) = ALT(s, r) |         | 
|     31   def | (r: String) = ALT(s, r) |         | 
|     32   def % = STAR(s) |         | 
|     33   def ~ (r: Rexp) = SEQ(s, r) |         | 
|     34   def ~ (r: String) = SEQ(s, r) |         | 
|     35 } |         | 
|     36  |         | 
|     37 def Range(s : List[Char]) : Rexp = s match { |         | 
|     38   case Nil => NULL |         | 
|     39   case c::Nil => CHAR(c) |         | 
|     40   case c::s => ALT(CHAR(c), Range(s)) |         | 
|     41 } |         | 
|     42 def RANGE(s: String) = Range(s.toList) |         | 
|     43  |         | 
|     44 def PLUS(r: Rexp) = SEQ(r, STAR(r)) |         | 
|     45  |         | 
|     46 val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") |         | 
|     47 val DIGIT = RANGE("0123456789") |         | 
|     48 val ID = SYM ~ (SYM | DIGIT).%  |         | 
|     49 val NUM = PLUS(DIGIT) |         | 
|     50 val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write"  |         | 
|     51 val SEMI: Rexp = ";" |         | 
|     52 val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">" |         | 
|     53 val WHITESPACE = PLUS(RANGE(" \n")) |         | 
|     54 val RPAREN: Rexp = ")" |         | 
|     55 val LPAREN: Rexp = "(" |         | 
|     56 val BEGIN: Rexp = "{" |         | 
|     57 val END: Rexp = "}" |         | 
|     58  |         | 
|     59 //regular expressions ranked by position in the list |         | 
|     60 val regs: List[Rexp] =  |         | 
|     61   List(KEYWORD, ID, OP, NUM, SEMI, LPAREN, RPAREN, BEGIN, END, WHITESPACE) |         | 
|     62  |         | 
|     63 def nullable (r: Rexp) : Boolean = r match { |         | 
|     64   case NULL => false |         | 
|     65   case EMPTY => true |         | 
|     66   case CHAR(_) => false |         | 
|     67   case ALT(r1, r2) => nullable(r1) || nullable(r2) |         | 
|     68   case SEQ(r1, r2) => nullable(r1) && nullable(r2) |         | 
|     69   case STAR(_) => true |         | 
|     70 } |         | 
|     71  |         | 
|     72 def zeroable (r: Rexp) : Boolean = r match { |         | 
|     73   case NULL => true |         | 
|     74   case EMPTY => false |         | 
|     75   case CHAR(_) => false |         | 
|     76   case ALT(r1, r2) => zeroable(r1) && zeroable(r2) |         | 
|     77   case SEQ(r1, r2) => zeroable(r1) || zeroable(r2) |         | 
|     78   case STAR(_) => false |         | 
|     79 } |         | 
|     80  |         | 
|     81 def der (c: Char, r: Rexp) : Rexp = r match { |         | 
|     82   case NULL => NULL |         | 
|     83   case EMPTY => NULL   |         | 
|     84   case CHAR(d) => if (c == d) EMPTY else NULL |         | 
|     85   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |         | 
|     86   case SEQ(r1, r2) =>  |         | 
|     87     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) |         | 
|     88     else SEQ(der(c, r1), r2) |         | 
|     89   case STAR(r) => SEQ(der(c, r), STAR(r)) |         | 
|     90 } |         | 
|     91  |         | 
|     92  |         | 
|     93 // calculates derivatives until all of them are zeroable |         | 
|     94 @tailrec |         | 
|     95 def munch(s: List[Char],  |         | 
|     96           pos: Int,  |         | 
|     97           rs: List[Rexp],  |         | 
|     98           last: Option[Int]): Option[Int] = rs match { |         | 
|     99   case Nil => last |         | 
|    100   case rs if (s.length <= pos) => last |         | 
|    101   case rs => { |         | 
|    102     val ders = rs.map(der(s(pos), _)) |         | 
|    103     val rs_nzero = ders.filterNot(zeroable(_)) |         | 
|    104     val rs_nulls = ders.filter(nullable(_)) |         | 
|    105     val new_last = if (rs_nulls != Nil) Some(pos) else last |         | 
|    106     munch(s, 1 + pos, rs_nzero, new_last) |         | 
|    107   } |         | 
|    108 } |         | 
|    109  |         | 
|    110 // iterates the munching function and prints  |         | 
|    111 // out the component strings |         | 
|    112 @tailrec |         | 
|    113 def tokenize(s: String, rs: List[Rexp]) : Unit = munch(s.toList, 0, rs, None) match { |         | 
|    114   case None if (s == "") => println("EOF") |         | 
|    115   case None => println(s"Lexing error: $s") |         | 
|    116   case Some(n) => { |         | 
|    117     val (head, tail) = s.splitAt(n + 1) |         | 
|    118     print(s"|${head.replaceAll("\n","RET")}|") |         | 
|    119     tokenize(tail, rs) |         | 
|    120   } |         | 
|    121 } |         | 
|    122  |         | 
|    123  |         | 
|    124 val test_prog = """ |         | 
|    125 start := XXX; |         | 
|    126 x := start; |         | 
|    127 y := start; |         | 
|    128 z := start; |         | 
|    129 while 0 < x do { |         | 
|    130  while 0 < y do { |         | 
|    131   while 0 < z do { |         | 
|    132     z := z - 1 |         | 
|    133   }; |         | 
|    134   z := start; |         | 
|    135   y := y - 1 |         | 
|    136  };      |         | 
|    137  y := start; |         | 
|    138  x := x - 1 |         | 
|    139 }; |         | 
|    140 write x; |         | 
|    141 write y; |         | 
|    142 write z |         | 
|    143 """ |         | 
|    144  |         | 
|    145 tokenize(test_prog, regs) |         |