diff -r e66bd5c563eb -r b5b5583a3a08 progs/token-bak.scala --- a/progs/token-bak.scala Mon Jul 27 11:02:48 2020 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,145 +0,0 @@ -import scala.language.implicitConversions -import scala.language.reflectiveCalls -import scala.util._ -import scala.annotation.tailrec - -sealed abstract class Rexp - -case object NULL extends Rexp -case object EMPTY extends Rexp -case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp - -def charlist2rexp(s : List[Char]) : Rexp = s match { - case Nil => EMPTY - case c::Nil => CHAR(c) - case c::s => SEQ(CHAR(c), charlist2rexp(s)) -} -implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) - - -implicit def RexpOps(r: Rexp) = new { - def | (s: Rexp) = ALT(r, s) - def % = STAR(r) - def ~ (s: Rexp) = SEQ(r, s) -} - -implicit def stringOps(s: String) = new { - def | (r: Rexp) = ALT(s, r) - def | (r: String) = ALT(s, r) - def % = STAR(s) - def ~ (r: Rexp) = SEQ(s, r) - def ~ (r: String) = SEQ(s, r) -} - -def Range(s : List[Char]) : Rexp = s match { - case Nil => NULL - case c::Nil => CHAR(c) - case c::s => ALT(CHAR(c), Range(s)) -} -def RANGE(s: String) = Range(s.toList) - -def PLUS(r: Rexp) = SEQ(r, STAR(r)) - -val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz_") -val DIGIT = RANGE("0123456789") -val ID = SYM ~ (SYM | DIGIT).% -val NUM = PLUS(DIGIT) -val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" -val SEMI: Rexp = ";" -val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">" -val WHITESPACE = PLUS(RANGE(" \n")) -val RPAREN: Rexp = ")" -val LPAREN: Rexp = "(" -val BEGIN: Rexp = "{" -val END: Rexp = "}" - -//regular expressions ranked by position in the list -val regs: List[Rexp] = - List(KEYWORD, ID, OP, NUM, SEMI, LPAREN, RPAREN, BEGIN, END, WHITESPACE) - -def nullable (r: Rexp) : Boolean = r match { - case NULL => false - case EMPTY => true - case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) - case SEQ(r1, r2) => nullable(r1) && nullable(r2) - case STAR(_) => true -} - -def zeroable (r: Rexp) : Boolean = r match { - case NULL => true - case EMPTY => false - case CHAR(_) => false - case ALT(r1, r2) => zeroable(r1) && zeroable(r2) - case SEQ(r1, r2) => zeroable(r1) || zeroable(r2) - case STAR(_) => false -} - -def der (c: Char, r: Rexp) : Rexp = r match { - case NULL => NULL - case EMPTY => NULL - case CHAR(d) => if (c == d) EMPTY else NULL - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) - case SEQ(r1, r2) => - if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) - else SEQ(der(c, r1), r2) - case STAR(r) => SEQ(der(c, r), STAR(r)) -} - - -// calculates derivatives until all of them are zeroable -@tailrec -def munch(s: List[Char], - pos: Int, - rs: List[Rexp], - last: Option[Int]): Option[Int] = rs match { - case Nil => last - case rs if (s.length <= pos) => last - case rs => { - val ders = rs.map(der(s(pos), _)) - val rs_nzero = ders.filterNot(zeroable(_)) - val rs_nulls = ders.filter(nullable(_)) - val new_last = if (rs_nulls != Nil) Some(pos) else last - munch(s, 1 + pos, rs_nzero, new_last) - } -} - -// iterates the munching function and prints -// out the component strings -@tailrec -def tokenize(s: String, rs: List[Rexp]) : Unit = munch(s.toList, 0, rs, None) match { - case None if (s == "") => println("EOF") - case None => println(s"Lexing error: $s") - case Some(n) => { - val (head, tail) = s.splitAt(n + 1) - print(s"|${head.replaceAll("\n","RET")}|") - tokenize(tail, rs) - } -} - - -val test_prog = """ -start := XXX; -x := start; -y := start; -z := start; -while 0 < x do { - while 0 < y do { - while 0 < z do { - z := z - 1 - }; - z := start; - y := y - 1 - }; - y := start; - x := x - 1 -}; -write x; -write y; -write z -""" - -tokenize(test_prog, regs)