progs/token2.scala
author Christian Urban <urbanc@in.tum.de>
Thu, 21 Nov 2019 00:49:21 +0000
changeset 696 3a5a7908517f
parent 392 2d0a59127694
permissions -rw-r--r--
added krakatau version and made the mandel program work

import scala.language.implicitConversions    
import scala.language.reflectiveCalls
import scala.annotation.tailrec   

abstract class Rexp 
case object NULL extends Rexp
case object EMPTY extends Rexp
case class CHAR(c: Char) extends Rexp
case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
case class STAR(r: Rexp) extends Rexp 
case class RECD(x: String, r: Rexp) extends Rexp
case class CRANGE(cs: String) extends Rexp
case class PLUS(r: Rexp) extends Rexp
case class OPT(r: Rexp) extends Rexp
case class NTIMES(r: Rexp, n: Int) extends Rexp

abstract class Val
case object Empty extends Val
case class Chr(c: Char) extends Val
case class Seq(v1: Val, v2: Val) extends Val
case class Left(v: Val) extends Val
case class Right(v: Val) extends Val
case class Stars(vs: List[Val]) extends Val
case class Rec(x: String, v: Val) extends Val
   
// some convenience for typing in regular expressions
def charlist2rexp(s : List[Char]): Rexp = s match {
  case Nil => EMPTY
  case c::Nil => CHAR(c)
  case c::s => SEQ(CHAR(c), charlist2rexp(s))
}
implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)

implicit def RexpOps(r: Rexp) = new {
  def | (s: Rexp) = ALT(r, s)
  def % = STAR(r)
  def ~ (s: Rexp) = SEQ(r, s)
}

implicit def stringOps(s: String) = new {
  def | (r: Rexp) = ALT(s, r)
  def | (r: String) = ALT(s, r)
  def % = STAR(s)
  def ~ (r: Rexp) = SEQ(s, r)
  def ~ (r: String) = SEQ(s, r)
  def $ (r: Rexp) = RECD(s, r)
}

// nullable function: tests whether the regular 
// expression can recognise the empty string
def nullable (r: Rexp) : Boolean = r match {
  case NULL => false
  case EMPTY => true
  case CHAR(_) => false
  case ALT(r1, r2) => nullable(r1) || nullable(r2)
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
  case STAR(_) => true
  case RECD(_, r) => nullable(r)
  case CRANGE(_) => false
  case PLUS(r) => nullable(r)
  case OPT(_) => true
  case NTIMES(r, n) => if (n == 0) true else nullable(r)
}

// derivative of a regular expression w.r.t. a character
def der (c: Char, r: Rexp) : Rexp = r match {
  case NULL => NULL
  case EMPTY => NULL
  case CHAR(d) => if (c == d) EMPTY else NULL
  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
  case SEQ(r1, r2) => 
    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
    else SEQ(der(c, r1), r2)
  case STAR(r) => SEQ(der(c, r), STAR(r))
  case RECD(_, r1) => der(c, r1)
  case CRANGE(cs) => if (cs.contains(c)) EMPTY else NULL
  case PLUS(r) => SEQ(der(c, r), STAR(r))
  case OPT(r) => ALT(der(c, r), NULL)
  case NTIMES(r, n) => if (n == 0) NULL else der(c, SEQ(r, NTIMES(r, n - 1)))
}

// derivative w.r.t. a string (iterates der)
def ders (s: List[Char], r: Rexp) : Rexp = s match {
  case Nil => r
  case c::s => ders(s, der(c, r))
}

// extracts a string from value
def flatten(v: Val) : String = v match {
  case Empty => ""
  case Chr(c) => c.toString
  case Left(v) => flatten(v)
  case Right(v) => flatten(v)
  case Seq(v1, v2) => flatten(v1) + flatten(v2)
  case Stars(vs) => vs.map(flatten).mkString
  case Rec(_, v) => flatten(v)
}

// extracts an environment from a value
def env(v: Val) : List[(String, String)] = v match {
  case Empty => Nil
  case Chr(c) => Nil
  case Left(v) => env(v)
  case Right(v) => env(v)
  case Seq(v1, v2) => env(v1) ::: env(v2)
  case Stars(vs) => vs.flatMap(env)
  case Rec(x, v) => (x, flatten(v))::env(v)
}

// injection part
def mkeps(r: Rexp) : Val = r match {
  case EMPTY => Empty
  case ALT(r1, r2) => 
    if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
  case SEQ(r1, r2) => Seq(mkeps(r1), mkeps(r2))
  case STAR(r) => Stars(Nil)
  case RECD(x, r) => Rec(x, mkeps(r))
  case PLUS(r) => Stars(List(mkeps(r)))
  case OPT(_) => Right(Empty)
  case NTIMES(r, n) => if (n == 0) Stars(Nil) else Stars(Nil.padTo(n, mkeps(r)))
  case _ => { println ("r : " + r.toString); 
              throw new Exception("mkeps error")}
}


def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
  case (STAR(r), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
  case (SEQ(r1, r2), Seq(v1, v2)) => Seq(inj(r1, c, v1), v2)
  case (SEQ(r1, r2), Left(Seq(v1, v2))) => Seq(inj(r1, c, v1), v2)
  case (SEQ(r1, r2), Right(v2)) => Seq(mkeps(r1), inj(r2, c, v2))
  case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
  case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
  case (CHAR(_), Empty) => Chr(c) 
  case (CRANGE(_), Empty) => Chr(c) 
  case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
  case (PLUS(r), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
  case (OPT(r), Left(v1)) => Left(inj(r, c, v1))
  case (NTIMES(r, n), Seq(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
  case (NTIMES(r, n), Left(Seq(v1, Stars(vs)))) => Stars(inj(r, c, v1)::vs)
  case (NTIMES(r, n), Right(Stars(v::vs))) => Stars(mkeps(r)::inj(r, c, v)::vs)
  case _ => { println ("r : " + r.toString + "  v: " + v.toString); 
              throw new Exception("inj error")}
}

// main lexing function (produces a value)
def lex(r: Rexp, s: List[Char]) : Val = s match {
  case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
  case c::cs => inj(r, c, lex(der(c, r), cs))
}

def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)

lexing(("ab" | "ab") ~ ("b" | EMPTY), "ab")

lexing(OPT("ab"), "ab")

lexing(NTIMES("1", 3), "111")
lexing(NTIMES("1" | EMPTY, 3), "11")

// some "rectification" functions for simplification
def F_ID(v: Val): Val = v
def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
  case Right(v) => Right(f2(v))
  case Left(v) => Left(f1(v))
}
def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
  case Seq(v1, v2) => Seq(f1(v1), f2(v2))
}
def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
  (v:Val) => Seq(f1(Empty), f2(v))
def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
  (v:Val) => Seq(f1(v), f2(Empty))
def F_RECD(f: Val => Val) = (v:Val) => v match {
  case Rec(x, v) => Rec(x, f(v))
}
def F_ERROR(v: Val): Val = throw new Exception("error")

// simplification of regular expressions returning also an
// rectification function; no simplification under STAR 
def simp(r: Rexp): (Rexp, Val => Val) = r match {
  case ALT(r1, r2) => {
    val (r1s, f1s) = simp(r1)
    val (r2s, f2s) = simp(r2)
    (r1s, r2s) match {
      case (NULL, _) => (r2s, F_RIGHT(f2s))
      case (_, NULL) => (r1s, F_LEFT(f1s))
      case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
                else (ALT (r1s, r2s), F_ALT(f1s, f2s)) 
    }
  }
  case SEQ(r1, r2) => {
    val (r1s, f1s) = simp(r1)
    val (r2s, f2s) = simp(r2)
    (r1s, r2s) match {
      case (NULL, _) => (NULL, F_ERROR)
      case (_, NULL) => (NULL, F_ERROR)
      case (EMPTY, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
      case (_, EMPTY) => (r1s, F_SEQ_Empty2(f1s, f2s))
      case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
    }
  }
  case RECD(x, r1) => {
    val (r1s, f1s) = simp(r1)
    (RECD(x, r1s), F_RECD(f1s))
  }
  case r => (r, F_ID)
}

def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
  case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
  case c::cs => {
    val (r_simp, f_simp) = simp(der(c, r))
    inj(r, c, f_simp(lex_simp(r_simp, cs)))
  }
}

def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)

lexing_simp(("a" | "ab") ~ ("b" | ""), "ab")

lexing_simp(OPT("ab"), "ab")

// Lexing Rules for a Small While Language

val SYM = CRANGE("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
val DIGIT = CRANGE("0123456789")
val ID = SYM ~ (SYM | DIGIT).% 
val NUM = PLUS(DIGIT)
val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false"
val SEMI: Rexp = ";"
val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
val WHITESPACE = PLUS(" " | "\n" | "\t")
val RPAREN: Rexp = ")"
val LPAREN: Rexp = "("
val BEGIN: Rexp = "{"
val END: Rexp = "}"
val STRING: Rexp = "\"" ~ SYM.% ~ "\""


val WHILE_REGS = (("k" $ KEYWORD) | 
                  ("i" $ ID) | 
                  ("o" $ OP) | 
                  ("n" $ NUM) | 
                  ("s" $ SEMI) | 
                  ("str" $ STRING) |
                  ("p" $ (LPAREN | RPAREN)) | 
                  ("b" $ (BEGIN | END)) | 
                  ("w" $ WHITESPACE)).%

// filters out all white spaces
def tokenise(r: Rexp, s: String) = 
  env(lexing_simp(r, s)).filterNot { (s) => s._1 == "w"}

//   Testing
//============

def time[T](code: => T) = {
  val start = System.nanoTime()
  val result = code
  val end = System.nanoTime()
  println((end - start)/1.0e9)
  result
}

println(lexing_simp(OPT("1"), "1"))
println(lexing_simp(OPT("1"), ""))
println(ders("111".toList, NTIMES("1",3)))
println(lexing_simp(NTIMES("1",3), "111"))

val r1 = ("a" | "ab") ~ ("bcd" | "c")
println(lexing(r1, "abcd"))

val r2 = ("" | "a") ~ ("ab" | "b")
println(lexing(r2, "ab"))


// Two Simple While Tests
//========================
println("prog0 test")

val prog0 = """read n"""
println(env(lexing_simp(WHILE_REGS, prog0)))

println("prog1 test")

val prog1 = """read  n; write (n)"""
println(env(lexing_simp(WHILE_REGS, prog1)))


// Big Test
//==========

def escape(raw: String): String = {
  import scala.reflect.runtime.universe._
  Literal(Constant(raw)).toString
}

val prog2 = """
write "Fib";
read n;
minus1 := 0;
minus2 := 1;
while n > 0 do {
  temp := minus2;
  minus2 := minus1 + minus2;
  minus1 := temp;
  n := n - 1
};
write "Result";
write minus2
"""

val prog3 = """
start := 1000;
x := start;
y := start;
z := start;
while 0 < x do {
 while 0 < y do {
  while 0 < z do {
    z := z - 1
  };
  z := start;
  y := y - 1
 };     
 y := start;
 x := x - 1
}
"""


println("Tokens")
println(tokenise(WHILE_REGS, prog2).mkString("\n"))

val fib_tokens = tokenise(WHILE_REGS, prog2)
fib_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")


val test_tokens = tokenise(WHILE_REGS, prog3)
test_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")


/*
for (i <- 1 to 120 by 10) {
  print(i.toString + ":  ")
  time(lexing_simp(WHILE_REGS, prog2 * i))
}
*/

val toks_fib = 
  List(("k","write"),
       ("str","Fib"),
       ("s",";"),
       ("k","read"),
       ("i","n"),
       ("s",";"),
       ("i","minus1"),
       ("o",":="),
       ("n","0"),
       ("s",";"),
       ("i","minus2"),
       ("o",":="),
       ("n","1"),
       ("s",";"),
       ("k","while"),
       ("i","n"),
       ("o",">"),
       ("n","0"),
       ("k","do"),
       ("b","{"),
       ("i","temp"),
       ("o",":="),
       ("i","minus2"),
       ("s",";"),
       ("i","minus2"),
       ("o",":="),
       ("i","minus1"),
       ("o","+"),
       ("i","minus2"),
       ("s",";"),
       ("i","minus1"),
       ("o",":="),
       ("i","temp"),
       ("s",";"),
       ("i","n"),
       ("o",":="),
       ("i","n"),
       ("o","-"),
       ("n","1"),
       ("b","}"),
       ("s",";"),
       ("k","write"),
       ("str","Result"),
       ("s",";"),
       ("k","write"),
       ("i","minus2"))

val toks_test =
  List(("i","start"),
       ("o",":="),
       ("n","1000"),
       ("s",";"),
       ("i","x"),
       ("o",":="),
       ("i","start"),
       ("s",";"),
       ("i","y"),
       ("o",":="),
       ("i","start"),
       ("s",";"),
       ("i","z"),
       ("o",":="),
       ("i","start"),
       ("s",";"),
       ("k","while"),
       ("n","0"),
       ("o","<"),
       ("i","x"),
       ("k","do"),
       ("b","{"),
       ("k","while"),
       ("n","0"),
       ("o","<"),
       ("i","y"),
       ("k","do"),
       ("b","{"),
       ("k","while"),
       ("n","0"),
       ("o","<"),
       ("i","z"),
       ("k","do"),
       ("b","{"),
       ("i","z"),
       ("o",":="),
       ("i","z"),
       ("o","-"),
       ("n","1"),
       ("b","}"),
       ("s",";"),
       ("i","z"),
       ("o",":="),
       ("i","start"),
       ("s",";"),
       ("i","y"),
       ("o",":="),
       ("i","y"),
       ("o","-"),
       ("n","1"),
       ("b","}"),
       ("s",";"),
       ("i","y"),
       ("o",":="),
       ("i","start"),
       ("s",";"),
       ("i","x"),
       ("o",":="),
       ("i","x"),
       ("o","-"),
       ("n","1"),
       ("b","}"))