thys2/blexer1.sc
author Chengsong
Fri, 04 Feb 2022 00:05:12 +0000
changeset 409 f71df68776bb
parent 403 6291181fad07
child 412 48876e1092f1
permissions -rw-r--r--
5ct

// A simple lexer inspired by work of Sulzmann & Lu
//==================================================
//
// Call the test cases with 
//
//   amm lexer.sc small
//   amm lexer.sc fib
//   amm lexer.sc loops
//   amm lexer.sc email
//
//   amm lexer.sc all


// regular expressions including records
abstract class Rexp 
case object ZERO extends Rexp
case object ONE extends Rexp
case object ANYCHAR extends Rexp
case class CHAR(c: Char) extends Rexp
case class ALTS(r1: Rexp, r2: Rexp) extends Rexp 
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
case class STAR(r: Rexp) extends Rexp 
case class RECD(x: String, r: Rexp) extends Rexp  
case class NTIMES(n: Int, r: Rexp) extends Rexp
case class OPTIONAL(r: Rexp) extends Rexp
case class NOT(r: Rexp) extends Rexp
                // records for extracting strings or tokens
  
// values  
abstract class Val
case object Empty extends Val
case class Chr(c: Char) extends Val
case class Sequ(v1: Val, v2: Val) extends Val
case class Left(v: Val) extends Val
case class Right(v: Val) extends Val
case class Stars(vs: List[Val]) extends Val
case class Rec(x: String, v: Val) extends Val
case class Ntime(vs: List[Val]) extends Val
case class Optionall(v: Val) extends Val
case class Nots(s: String) extends Val



abstract class Bit
case object Z extends Bit
case object S extends Bit


type Bits = List[Bit]

abstract class ARexp 
case object AZERO extends ARexp
case class AONE(bs: Bits) extends ARexp
case class ACHAR(bs: Bits, c: Char) extends ARexp
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
case class ANOT(bs: Bits, r: ARexp) extends ARexp
case class AANYCHAR(bs: Bits) extends ARexp


   
// some convenience for typing in regular expressions

def charlist2rexp(s : List[Char]): Rexp = s match {
  case Nil => ONE
  case c::Nil => CHAR(c)
  case c::s => SEQ(CHAR(c), charlist2rexp(s))
}
implicit def string2rexp(s : String) : Rexp = 
  charlist2rexp(s.toList)

implicit def RexpOps(r: Rexp) = new {
  def | (s: Rexp) = ALTS(r, s)
  def % = STAR(r)
  def ~ (s: Rexp) = SEQ(r, s)
}

implicit def stringOps(s: String) = new {
  def | (r: Rexp) = ALTS(s, r)
  def | (r: String) = ALTS(s, r)
  def % = STAR(s)
  def ~ (r: Rexp) = SEQ(s, r)
  def ~ (r: String) = SEQ(s, r)
  def $ (r: Rexp) = RECD(s, r)
}

def nullable(r: Rexp) : Boolean = r match {
  case ZERO => false
  case ONE => true
  case CHAR(_) => false
  case ANYCHAR => false
  case ALTS(r1, r2) => nullable(r1) || nullable(r2)
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
  case STAR(_) => true
  case RECD(_, r1) => nullable(r1)
  case NTIMES(n, r) => if (n == 0) true else nullable(r)
  case OPTIONAL(r) => true
  case NOT(r) => !nullable(r)
}

def der(c: Char, r: Rexp) : Rexp = r match {
  case ZERO => ZERO
  case ONE => ZERO
  case CHAR(d) => if (c == d) ONE else ZERO
  case ANYCHAR => ONE 
  case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
  case SEQ(r1, r2) => 
    if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
    else SEQ(der(c, r1), r2)
  case STAR(r) => SEQ(der(c, r), STAR(r))
  case RECD(_, r1) => der(c, r1)
  case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
  case OPTIONAL(r) => der(c, r)
  case NOT(r) =>  NOT(der(c, r))
}


// extracts a string from a value
def flatten(v: Val) : String = v match {
  case Empty => ""
  case Chr(c) => c.toString
  case Left(v) => flatten(v)
  case Right(v) => flatten(v)
  case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
  case Stars(vs) => vs.map(flatten).mkString
  case Ntime(vs) => vs.map(flatten).mkString
  case Optionall(v) => flatten(v)
  case Rec(_, v) => flatten(v)
}


// extracts an environment from a value;
// used for tokenising a string
def env(v: Val) : List[(String, String)] = v match {
  case Empty => Nil
  case Chr(c) => Nil
  case Left(v) => env(v)
  case Right(v) => env(v)
  case Sequ(v1, v2) => env(v1) ::: env(v2)
  case Stars(vs) => vs.flatMap(env)
  case Ntime(vs) => vs.flatMap(env)
  case Rec(x, v) => (x, flatten(v))::env(v)
  case Optionall(v) => env(v)
  case Nots(s) => ("Negative", s) :: Nil
}


// The injection and mkeps part of the lexer
//===========================================

def mkeps(r: Rexp) : Val = r match {
  case ONE => Empty
  case ALTS(r1, r2) => 
    if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
  case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
  case STAR(r) => Stars(Nil)
  case RECD(x, r) => Rec(x, mkeps(r))
  case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
  case OPTIONAL(r) => Optionall(Empty)
  case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")  
                         else Nots("")//Nots(s.reverse.toString)
//   case NOT(ZERO) => Empty
//   case NOT(CHAR(c)) => Empty
//   case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
//   case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
//   case NOT(STAR(r)) => Stars(Nil) 

}

def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
  case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
  case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
  case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
  case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
  case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
  case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
  case (CHAR(d), Empty) => Chr(c) 
  case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
  case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
  case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
  case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
  case (ANYCHAR, Empty) => Chr(c)
}

// some "rectification" functions for simplification
def F_ID(v: Val): Val = v
def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
  case Right(v) => Right(f2(v))
  case Left(v) => Left(f1(v))
}
def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
  case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
}
def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
  (v:Val) => Sequ(f1(Empty), f2(v))
def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
  (v:Val) => Sequ(f1(v), f2(Empty))

def F_ERROR(v: Val): Val = throw new Exception("error")

// simplification
def simp(r: Rexp): (Rexp, Val => Val) = r match {
  case ALTS(r1, r2) => {
    val (r1s, f1s) = simp(r1)
    val (r2s, f2s) = simp(r2)
    (r1s, r2s) match {
      case (ZERO, _) => (r2s, F_RIGHT(f2s))
      case (_, ZERO) => (r1s, F_LEFT(f1s))
      case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
                else (ALTS (r1s, r2s), F_ALT(f1s, f2s)) 
    }
  }
  case SEQ(r1, r2) => {
    val (r1s, f1s) = simp(r1)
    val (r2s, f2s) = simp(r2)
    (r1s, r2s) match {
      case (ZERO, _) => (ZERO, F_ERROR)
      case (_, ZERO) => (ZERO, F_ERROR)
      case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
      case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
      case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
    }
  }
  case r => (r, F_ID)
}

// lexing functions including simplification
def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
  case Nil => if (nullable(r)) mkeps(r) else 
    { throw new Exception(s"lexing error $r not nullable") } 
  case c::cs => {
    val (r_simp, f_simp) = simp(der(c, r))
    inj(r, c, f_simp(lex_simp(r_simp, cs)))
  }
}

def lexing_simp(r: Rexp, s: String) = 
  env(lex_simp(r, s.toList))

// The Lexing Rules for the WHILE Language

def PLUS(r: Rexp) = r ~ r.%

def Range(s : List[Char]) : Rexp = s match {
  case Nil => ZERO
  case c::Nil => CHAR(c)
  case c::s => ALTS(CHAR(c), Range(s))
}
def RANGE(s: String) = Range(s.toList)

val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_")
val DIGIT = RANGE("0123456789")
val ID = SYM ~ (SYM | DIGIT).% 
val NUM = PLUS(DIGIT)
val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" 
val SEMI: Rexp = ";"
val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
val WHITESPACE = PLUS(" " | "\n" | "\t" | "\r")
val RPAREN: Rexp = "{"
val LPAREN: Rexp = "}"
val STRING: Rexp = "\"" ~ SYM.% ~ "\""


//ab \ a --> 1b
//
val WHILE_REGS = (("k" $ KEYWORD) | 
                  ("i" $ ID) | 
                  ("o" $ OP) | 
                  ("n" $ NUM) | 
                  ("s" $ SEMI) | 
                  ("str" $ STRING) |
                  ("p" $ (LPAREN | RPAREN)) | 
                  ("w" $ WHITESPACE)).%

val NREGS = NTIMES(5, OPTIONAL(SYM))
val NREGS1 = ("test" $ NREGS)
// Two Simple While Tests
//========================
val NOTREG = "hehe" ~ NOT((ANYCHAR.%) ~ "haha" ~ (ANYCHAR.%))


  // bnullable function: tests whether the aregular 
  // expression can recognise the empty string
def bnullable (r: ARexp) : Boolean = r match {
    case AZERO => false
    case AONE(_) => true
    case ACHAR(_,_) => false
    case AALTS(_, rs) => rs.exists(bnullable)
    case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
    case ASTAR(_, _) => true
    case ANOT(_, rn) => !bnullable(rn)
  }

def mkepsBC(r: ARexp) : Bits = r match {
    case AONE(bs) => bs
    case AALTS(bs, rs) => {
      val n = rs.indexWhere(bnullable)
      bs ++ mkepsBC(rs(n))
    }
    case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
    case ASTAR(bs, r) => bs ++ List(Z)
    case ANOT(bs, rn) => bs
  }


def bder(c: Char, r: ARexp) : ARexp = r match {
    case AZERO => AZERO
    case AONE(_) => AZERO
    case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
    case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
    case ASEQ(bs, r1, r2) => 
      if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
      else ASEQ(bs, bder(c, r1), r2)
    case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
    case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
    case AANYCHAR(bs) => AONE(bs)
  } 

def fuse(bs: Bits, r: ARexp) : ARexp = r match {
    case AZERO => AZERO
    case AONE(cs) => AONE(bs ++ cs)
    case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
    case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
    case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
    case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
    case ANOT(cs, r) => ANOT(bs ++ cs, r)
  }


def internalise(r: Rexp) : ARexp = r match {
    case ZERO => AZERO
    case ONE => AONE(Nil)
    case CHAR(c) => ACHAR(Nil, c)
    //case PRED(f) => APRED(Nil, f)
    case ALTS(r1, r2) => 
      AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
    // case ALTS(r1::rs) => {
    //   val AALTS(Nil, rs2) = internalise(ALTS(rs))
    //   AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
    // }
    case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
    case STAR(r) => ASTAR(Nil, internalise(r))
    case RECD(x, r) => internalise(r)
    case NOT(r) => ANOT(Nil, internalise(r))
    case ANYCHAR => AANYCHAR(Nil)
  }


def bsimp(r: ARexp): ARexp = 
  {
    r match {
      case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
          case (AZERO, _) => AZERO
          case (_, AZERO) => AZERO
          case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
          case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
      }
      case AALTS(bs1, rs) => {
            val rs_simp = rs.map(bsimp(_))
            val flat_res = flats(rs_simp)
            val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
            dist_res match {
              case Nil => AZERO
              case s :: Nil => fuse(bs1, s)
              case rs => AALTS(bs1, rs)  
            }
          
      }
      case r => r
    }
  }
  def strongBsimp(r: ARexp): ARexp =
  {
    r match {
      case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
          case (AZERO, _) => AZERO
          case (_, AZERO) => AZERO
          case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
          case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
      }
      case AALTS(bs1, rs) => {
            val rs_simp = rs.map(strongBsimp(_))
            val flat_res = flats(rs_simp)
            val dist_res = strongDB(flat_res)//distinctBy(flat_res, erase)
            dist_res match {
              case Nil => AZERO
              case s :: Nil => fuse(bs1, s)
              case rs => AALTS(bs1, rs)  
            }
          
      }
      case r => r
    }
  }

  def bders (s: List[Char], r: ARexp) : ARexp = s match {
    case Nil => r
    case c::s => bders(s, bder(c, r))
  }

  def flats(rs: List[ARexp]): List[ARexp] = rs match {
      case Nil => Nil
      case AZERO :: rs1 => flats(rs1)
      case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
      case r1 :: rs2 => r1 :: flats(rs2)
    }

  def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
    case Nil => Nil
    case (x::xs) => {
      val res = f(x)
      if (acc.contains(res)) distinctBy(xs, f, acc)  
      else x::distinctBy(xs, f, res::acc)
    }
  } 

  def prettyRexp(r: Rexp) : String = r match {
      case STAR(r0) => s"${prettyRexp(r0)}*"
      case SEQ(CHAR(c), r2) => c.toString ++ prettyRexp(r2)
      case SEQ(r1, r2) => s"${prettyRexp(r1)}~${prettyRexp(r2)}"
      case CHAR(c) => c.toString
      case ANYCHAR => "."
    //   case NOT(r0) => s
  }

  def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
    case (ONE, bs) => (Empty, bs)
    case (CHAR(f), bs) => (Chr(f), bs)
    case (ALTS(r1, r2), Z::bs1) => {
        val (v, bs2) = decode_aux(r1, bs1)
        (Left(v), bs2)
    }
    case (ALTS(r1, r2), S::bs1) => {
        val (v, bs2) = decode_aux(r2, bs1)
        (Right(v), bs2)
    }
    case (SEQ(r1, r2), bs) => {
      val (v1, bs1) = decode_aux(r1, bs)
      val (v2, bs2) = decode_aux(r2, bs1)
      (Sequ(v1, v2), bs2)
    }
    case (STAR(r1), S::bs) => {
      val (v, bs1) = decode_aux(r1, bs)
      //println(v)
      val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
      (Stars(v::vs), bs2)
    }
    case (STAR(_), Z::bs) => (Stars(Nil), bs)
    case (RECD(x, r1), bs) => {
      val (v, bs1) = decode_aux(r1, bs)
      (Rec(x, v), bs1)
    }
    case (NOT(r), bs) => (Nots(prettyRexp(r)), bs)
  }

  def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
    case (v, Nil) => v
    case _ => throw new Exception("Not decodable")
  }



def blexSimp(r: Rexp, s: String) : List[Bit] = {
    blex_simp(internalise(r), s.toList)
}

def blexing_simp(r: Rexp, s: String) : Val = {
    val bit_code = blex_simp(internalise(r), s.toList)
    decode(r, bit_code)
  }



  def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
    case Nil => r
    case c::s => bders_simp(s, bsimp(bder(c, r)))
  }
  
  def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))


  def erase(r:ARexp): Rexp = r match{
    case AZERO => ZERO
    case AONE(_) => ONE
    case ACHAR(bs, c) => CHAR(c)
    case AALTS(bs, Nil) => ZERO
    case AALTS(bs, a::Nil) => erase(a)
    case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
    case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
    case ASTAR(cs, r)=> STAR(erase(r))
    case ANOT(bs, r) => NOT(erase(r))
    case AANYCHAR(bs) => ANYCHAR
  }

def breakHead(r: ARexp) : List[ARexp] = r match {
    case AALTS(bs, rs) => rs
    case r => r::Nil
}

def distinctByWithAcc[B, C](xs: List[B], f: B => C, 
    acc: List[C] = Nil, accB: List[B] = Nil): (List[B], List[C]) = xs match {
    case Nil => (accB.reverse, acc)
    case (x::xs) => {
      val res = f(x)
      if (acc.contains(res)) distinctByWithAcc(xs, f, acc, accB)  
      else distinctByWithAcc(xs, f, res::acc, x::accB)
    }
  } 


  def strongDB(xs: List[ARexp], 
                       acc1: List[Rexp] = Nil, 
                       acc2 : List[(List[Rexp], Rexp)] = Nil): List[ARexp] = xs match {
    case Nil => Nil
    case (x::xs) => 
        if(acc1.contains(erase(x)))
            strongDB(xs, acc1, acc2)
        else{
            x match {
                case ASTAR(bs0, r0) => 
                    val headList : List[ARexp] = List[ARexp](AONE(Nil))
                    val i = acc2.indexWhere(
                        r2stl => {val (r2s, tl) = r2stl; tl == erase(r0) } 
                    )
                    if(i == -1){ 
                        x::strongDB(
                            xs, erase(x)::acc1, (ONE::Nil, erase(r0))::acc2
                        )
                    }
                    else{
                        val headListAlready = acc2(i)
                        val (newHeads, oldHeadsUpdated) = 
                                distinctByWithAcc(headList, erase, headListAlready._1)
                        newHeads match{
                            case newHead::Nil =>
                                ASTAR(bs0, r0) :: 
                                strongDB(xs, erase(x)::acc1, 
                                acc2.updated(i, (oldHeadsUpdated, headListAlready._2)) )//TODO: acc2 already contains headListAlready
                            case Nil =>
                                strongDB(xs, erase(x)::acc1, 
                                acc2)
                        }
                    }                
                case ASEQ(bs, r1, ASTAR(bs0, r0)) => 
                    val headList = breakHead(r1)
                    val i = acc2.indexWhere(
                        r2stl => {val (r2s, tl) = r2stl; tl == erase(r0) } 
                    )
                    if(i == -1){ 
                        x::strongDB(
                            xs, erase(x)::acc1, (headList.map(erase(_)), erase(r0))::acc2
                        )
                    }
                    else{
                        val headListAlready = acc2(i)
                        val (newHeads, oldHeadsUpdated) = 
                                distinctByWithAcc(headList, erase, headListAlready._1)
                        newHeads match{
                            case newHead::Nil =>
                                ASEQ(bs, newHead, ASTAR(bs0, r0)) :: 
                                strongDB(xs, erase(x)::acc1, 
                                acc2.updated(i, (oldHeadsUpdated, headListAlready._2)) )//TODO: acc2 already contains headListAlready
                            case Nil =>
                                strongDB(xs, erase(x)::acc1, 
                                acc2)
                            case hds => val AALTS(bsp, rsp) = r1
                                ASEQ(bs, AALTS(bsp, hds), ASTAR(bs0, r0)) ::
                                strongDB(xs, erase(x)::acc1,
                                acc2.updated(i, (oldHeadsUpdated, headListAlready._2)))
                        }
                    }
                case rPrime => x::strongDB(xs, erase(x)::acc1, acc2)    
            }
                
        }
    
}


def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
    case Nil => {
      if (bnullable(r)) {
        //println(asize(r))
        val bits = mkepsBC(r)
        bits
      }
    else throw new Exception("Not matched")
    }
    case c::cs => {
      val der_res = bder(c,r)
      val simp_res = bsimp(der_res)  
      blex_simp(simp_res, cs)      
    }
  }
  def size(r: Rexp) : Int = r match {
    case ZERO => 1
    case ONE => 1
    case CHAR(_) => 1
    case ANYCHAR => 1
    case NOT(r0) => 1 + size(r0)
    case SEQ(r1, r2) => 1 + size(r1) + size(r2)
    case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
    case STAR(r) => 1 + size(r)
  }

  def asize(a: ARexp) = size(erase(a))


// @arg(doc = "small tests")
val STARREG = ((STAR("a") | STAR("aa") ).%).%

@main
def small() = {

  val prog0 = """aaaaaaaaa"""
  println(s"test: $prog0")
//   println(lexing_simp(NOTREG, prog0))
//   val v = lex_simp(NOTREG, prog0.toList)
//   println(v)

//   val d =  (lex_simp(NOTREG, prog0.toList))
//   println(d)

  val bd = bdersSimp(prog0, STARREG)
    println(erase(bd))
    println(asize(bd))

    val vres = blexing_simp( STARREG, prog0)
    println(vres)
//   println(vs.length)
//   println(vs)
   

  // val prog1 = """read  n; write n"""  
  // println(s"test: $prog1")
  // println(lexing_simp(WHILE_REGS, prog1))
}

// // Bigger Tests
// //==============

// // escapes strings and prints them out as "", "\n" and so on
// def esc(raw: String): String = {
//   import scala.reflect.runtime.universe._
//   Literal(Constant(raw)).toString
// }

// def escape(tks: List[(String, String)]) =
//   tks.map{ case (s1, s2) => (s1, esc(s2))}


// val prog2 = """
// write "Fib";
// read n;
// minus1 := 0;
// minus2 := 1;
// while n > 0 do {
//   temp := minus2;
//   minus2 := minus1 + minus2;
//   minus1 := temp;
//   n := n - 1
// };
// write "Result";
// write minus2
// """

// @arg(doc = "Fibonacci test")
// @main
// def fib() = {
//   println("lexing fib program")
//   println(escape(lexing_simp(WHILE_REGS, prog2)).mkString("\n"))
// }


// val prog3 = """
// start := 1000;
// x := start;
// y := start;
// z := start;
// while 0 < x do {
//  while 0 < y do {
//   while 0 < z do {
//     z := z - 1
//   };
//   z := start;
//   y := y - 1
//  };     
//  y := start;
//  x := x - 1
// }
// """

// @arg(doc = "Loops test")
// @main
// def loops() = {
//   println("lexing Loops")
//   println(escape(lexing_simp(WHILE_REGS, prog3)).mkString("\n"))
// }

// @arg(doc = "Email Test")
// @main
// def email() = {
//   val lower = "abcdefghijklmnopqrstuvwxyz"

//   val NAME = RECD("name", PLUS(RANGE(lower ++ "_.-")))
//   val DOMAIN = RECD("domain", PLUS(RANGE(lower ++ "-")))
//   val RE = RANGE(lower ++ ".")
//   val TOPLEVEL = RECD("top", (RE ~ RE) |
//                              (RE ~ RE ~ RE) | 
//                              (RE ~ RE ~ RE ~ RE) | 
//                              (RE ~ RE ~ RE ~ RE ~ RE) |
//                              (RE ~ RE ~ RE ~ RE ~ RE ~ RE))

//   val EMAIL = NAME ~ "@" ~ DOMAIN ~ "." ~ TOPLEVEL

//   println(lexing_simp(EMAIL, "christian.urban@kcl.ac.uk"))
// }


// @arg(doc = "All tests.")
// @main
// def all() = { small(); fib() ; loops() ; email() } 




// runs with amm2 and amm3