thys2/blexer2.sc
author Chengsong
Thu, 01 Dec 2022 08:49:19 +0000
changeset 629 96e009a446d5
parent 628 7af4e2420a8c
child 639 80cc6dc4c98b
permissions -rw-r--r--
abstract comments incorporated

//Strong Bsimp to obtain Antimirov's cubic bound

// A simple lexer inspired by work of Sulzmann & Lu
//==================================================
//
// Call the test cases with 
//
//   amm lexer.sc small
//   amm lexer.sc fib
//   amm lexer.sc loops
//   amm lexer.sc email
//
//   amm lexer.sc all



// regular expressions including records
abstract class Rexp 
case object ZERO extends Rexp
case object ONE extends Rexp
case object ANYCHAR extends Rexp
case class CHAR(c: Char) extends Rexp
case class ALTS(r1: Rexp, r2: Rexp) extends Rexp 
case class ALTSS(rs: List[Rexp]) extends Rexp
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
case class STAR(r: Rexp) extends Rexp 
case class RECD(x: String, r: Rexp) extends Rexp  
case class NTIMES(n: Int, r: Rexp) extends Rexp
case class OPTIONAL(r: Rexp) extends Rexp
case class NOT(r: Rexp) extends Rexp
// records for extracting strings or tokens

// values  
abstract class Val
case object Failure extends Val
case object Empty extends Val
case class Chr(c: Char) extends Val
case class Sequ(v1: Val, v2: Val) extends Val
case class Left(v: Val) extends Val
case class Right(v: Val) extends Val
case class Stars(vs: List[Val]) extends Val
case class Rec(x: String, v: Val) extends Val
case class Ntime(vs: List[Val]) extends Val
case class Optionall(v: Val) extends Val
case class Nots(s: String) extends Val



abstract class Bit
case object Z extends Bit
case object S extends Bit


type Bits = List[Bit]

abstract class ARexp 
case object AZERO extends ARexp
case class AONE(bs: Bits) extends ARexp
case class ACHAR(bs: Bits, c: Char) extends ARexp
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
case class ANOT(bs: Bits, r: ARexp) extends ARexp
case class AANYCHAR(bs: Bits) extends ARexp

import scala.util.Try

trait Generator[+T] {
  self => // an alias for "this"
    def generate(): T

    def gen(n: Int) : List[T] = 
      if (n == 0) Nil else self.generate() :: gen(n - 1)

    def map[S](f: T => S): Generator[S] = new Generator[S] {
      def generate = f(self.generate())  
    }
    def flatMap[S](f: T => Generator[S]): Generator[S] = new Generator[S] {
      def generate = f(self.generate()).generate()
    }


}

// tests a property according to a given random generator
def test[T](r: Generator[T], amount: Int = 100)(pred: T => Boolean) {
  for (_ <- 0 until amount) {
    val value = r.generate()
    assert(pred(value), s"Test failed for: $value")
  }
  println(s"Test passed $amount times")
}
def test2[T, S](r: Generator[T], s: Generator[S], amount: Int = 100)(pred: (T, S) => Boolean) {
  for (_ <- 0 until amount) {
    val valueR = r.generate()
    val valueS = s.generate()
    assert(pred(valueR, valueS), s"Test failed for: $valueR, $valueS")
  }
  println(s"Test passed $amount times")
}

// random integers
val integers = new Generator[Int] {
  val rand = new java.util.Random
  def generate() = rand.nextInt()
}

// random booleans
val booleans = integers.map(_ > 0)

// random integers in the range lo and high  
def range(lo: Int, hi: Int): Generator[Int] = 
  for (x <- integers) yield (lo + x.abs % (hi - lo)).abs

// random characters
def chars_range(lo: Char, hi: Char) = range(lo, hi).map(_.toChar)  
val chars = chars_range('a', 'z')


def oneOf[T](xs: T*): Generator[T] = 
  for (idx <- range(0, xs.length)) yield xs(idx)

def single[T](x: T) = new Generator[T] {
  def generate() = x
}   

def pairs[T, U](t: Generator[T], u: Generator[U]): Generator[(T, U)] = 
  for (x <- t; y <- u) yield (x, y)

// lists
def emptyLists = single(Nil) 

def nonEmptyLists : Generator[List[Int]] = 
  for (head <- integers; tail <- lists) yield head :: tail

def lists: Generator[List[Int]] = for {
  kind <- booleans
  list <- if (kind) emptyLists else nonEmptyLists
} yield list

def char_list(len: Int): Generator[List[Char]] = {
  if(len <= 0) single(Nil)
  else{
    for { 
      c <- chars_range('a', 'c')
      tail <- char_list(len - 1)
    } yield c :: tail
  }
}

def strings(len: Int): Generator[String] = for(cs <- char_list(len)) yield cs.toString

def sampleString(r: Rexp) : List[String] = r match {
  case STAR(r) => stringsFromRexp(r).flatMap(s => List("", s, s ++ s))//only generate 0, 1, 2 reptitions
  case SEQ(r1, r2) => stringsFromRexp(r1).flatMap(s1 => stringsFromRexp(r2).map(s2 => s1 ++ s2) )
  case ALTS(r1, r2) => throw new Error(s" Rexp ${r} not expected: all alternatives are supposed to have been opened up")
  case ONE => "" :: Nil
  case ZERO => Nil
  case CHAR(c) => c.toString :: Nil

}

def stringsFromRexp(r: Rexp) : List[String] = 
  breakIntoTerms(r).flatMap(r => sampleString(r))


// (simple) binary trees
trait Tree[T]
case class Inner[T](left: Tree[T], right: Tree[T]) extends Tree[T]
case class Leaf[T](x: T) extends Tree[T]

def leafs[T](t: Generator[T]): Generator[Leaf[T]] = 
  for (x <- t) yield Leaf(x)

def inners[T](t: Generator[T]): Generator[Inner[T]] = 
  for (l <- trees(t); r <- trees(t)) yield Inner(l, r)

def trees[T](t: Generator[T]): Generator[Tree[T]] = 
  for (kind <- range(0, 2);  
       tree <- if (kind == 0) leafs(t) else inners(t)) yield tree

// regular expressions

// generates random leaf-regexes; prefers CHAR-regexes
def leaf_rexp() : Generator[Rexp] =
  for (kind <- range(0, 5);
       c <- chars_range('a', 'd')) yield
kind match {
  case 0 => ZERO
  case 1 => ONE
  case _ => CHAR(c) 
}

// generates random inner regexes with maximum depth d
def inner_rexp(d: Int) : Generator[Rexp] =
  for (kind <- range(0, 3);
       l <- rexp(d); 
       r <- rexp(d))
yield kind match {
  case 0 => ALTS(l, r)
  case 1 => SEQ(l, r)
  case 2 => STAR(r)
}

// generates random regexes with maximum depth d;
// prefers inner regexes in 2/3 of the cases
def rexp(d: Int = 100): Generator[Rexp] = 
  for (kind <- range(0, 3);
       r <- if (d <= 0 || kind == 0) leaf_rexp() else inner_rexp(d - 1)) yield r


// some test functions for rexps
def height(r: Rexp) : Int = r match {
  case ZERO => 1
  case ONE => 1
  case CHAR(_) => 1
  case ALTS(r1, r2) => 1 + List(height(r1), height(r2)).max
  case SEQ(r1, r2) =>  1 + List(height(r1), height(r2)).max
  case STAR(r) => 1 + height(r)
}

// def size(r: Rexp) : Int = r match {
//   case ZERO => 1
//   case ONE => 1
//   case CHAR(_) => 1
//   case ALTS(r1, r2) => 1 + size(r1) + size(r2)
//   case SEQ(r1, r2) =>  1 + size(r1) + size(r2)
//   case STAR(r) => 1 + size(r) 
// }

// randomly subtracts 1 or 2 from the STAR case
def size_faulty(r: Rexp) : Int = r match {
  case ZERO => 1
  case ONE => 1
  case CHAR(_) => 1
  case ALTS(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
  case SEQ(r1, r2) =>  1 + size_faulty(r1) + size_faulty(r2)
  case STAR(r) => 1 + size_faulty(r) - range(0, 2).generate
}



// some convenience for typing in regular expressions

def charlist2rexp(s : List[Char]): Rexp = s match {
  case Nil => ONE
  case c::Nil => CHAR(c)
  case c::s => SEQ(CHAR(c), charlist2rexp(s))
}
implicit def string2rexp(s : String) : Rexp = 
  charlist2rexp(s.toList)

  implicit def RexpOps(r: Rexp) = new {
    def | (s: Rexp) = ALTS(r, s)
    def % = STAR(r)
    def ~ (s: Rexp) = SEQ(r, s)
  }

  implicit def stringOps(s: String) = new {
    def | (r: Rexp) = ALTS(s, r)
    def | (r: String) = ALTS(s, r)
    def % = STAR(s)
    def ~ (r: Rexp) = SEQ(s, r)
    def ~ (r: String) = SEQ(s, r)
    def $ (r: Rexp) = RECD(s, r)
  }

  def nullable(r: Rexp) : Boolean = r match {
    case ZERO => false
    case ONE => true
    case CHAR(_) => false
    case ANYCHAR => false
    case ALTS(r1, r2) => nullable(r1) || nullable(r2)
    case ALTSS(rs) => rs.exists(nullable)
    case SEQ(r1, r2) => nullable(r1) && nullable(r2)
    case STAR(_) => true
    case RECD(_, r1) => nullable(r1)
    case NTIMES(n, r) => if (n == 0) true else nullable(r)
    case OPTIONAL(r) => true
    case NOT(r) => !nullable(r)
  }

  def der(c: Char, r: Rexp) : Rexp = r match {
    case ZERO => ZERO
    case ONE => ZERO
    case CHAR(d) => if (c == d) ONE else ZERO
    case ANYCHAR => ONE 
    case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
    case ALTSS(rs) => ALTSS(rs.map(der(c, _)))
    case SEQ(r1, r2) => 
      if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
      else SEQ(der(c, r1), r2)
    case STAR(r) => SEQ(der(c, r), STAR(r))
    case RECD(_, r1) => der(c, r1)
    case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
    case OPTIONAL(r) => der(c, r)
    case NOT(r) =>  NOT(der(c, r))
  }

  def ders(s: List[Char], r: Rexp) : Rexp = s match {
    case Nil => r
    case c :: cs => ders(cs, der(c, r))
  }

  def ders_simp(s: List[Char], r: Rexp) : Rexp = s match {
    case Nil => simp(r)
    case c :: cs => ders_simp(cs, simp(der(c, r)))
  }


  def simp2(r: Rexp) : Rexp = r match {
    case SEQ(r1, r2) => 
      (simp2(r1), simp2(r2)) match {
        case (ZERO, _) => ZERO
        case (_, ZERO) => ZERO
        case (ONE, r2s) => r2s
        case (r1s, ONE) => r1s
        case (r1s, r2s) => 
          SEQ(r1s, r2s)
      }
        case ALTS(r1, r2) => 
          val r1r2s = fltsplain(simp2(r1) :: simp2(r2) :: Nil).distinct 
          r1r2s match {
            case Nil => ZERO
            case r :: Nil => r
            case r1 :: r2 :: rs => 
              ALTSS(r1 :: r2 :: rs)
          }
            case ALTSS(rs) => 
              // println(rs)
              (fltsplain(rs.map(simp2))).distinct match {
                case Nil => ZERO
                case r :: Nil => r
                case r1 :: r2 :: rs =>
                  ALTSS(r1 :: r2 :: rs)
              }
                case r => r
  }


  def simp(r: Rexp) : Rexp = r match {
    case SEQ(r1, r2) => 
      (simp(r1), simp(r2)) match {
        case (ZERO, _) => ZERO
        case (_, ZERO) => ZERO
        case (ONE, r2s) => r2s
        case (r1s, ONE) => r1s
        case (r1s, r2s) => SEQ(r1s, r2s)
      }
        case ALTS(r1, r2) => {
          (simp(r1), simp(r2)) match {
            case (ZERO, r2s) => r2s
            case (r1s, ZERO) => r1s
            case (r1s, r2s) =>
              if(r1s == r2s) r1s else ALTS(r1s, r2s)
          }
        }
            case r => r
  }

  def fltsplain(rs: List[Rexp]) : List[Rexp] = rs match {
    case Nil => Nil
    case ZERO :: rs => fltsplain(rs)
    case ALTS(r1, r2) :: rs => r1 :: r2 :: fltsplain(rs) 
    case ALTSS(rs) :: rs1 => rs ::: fltsplain(rs1)
    case r :: rs => r :: fltsplain(rs)
  }




  def matcher(s: String, r: Rexp) : Boolean = 
    nullable(ders(s.toList, r))

  def simp_matcher(s: String, r: Rexp) : Boolean = 
    nullable(ders_simp(s.toList, r))


  // extracts a string from a value
  def flatten(v: Val) : String = v match {
    case Empty => ""
    case Chr(c) => c.toString
    case Left(v) => flatten(v)
    case Right(v) => flatten(v)
    case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
    case Stars(vs) => vs.map(flatten).mkString
    case Ntime(vs) => vs.map(flatten).mkString
    case Optionall(v) => flatten(v)
    case Rec(_, v) => flatten(v)
  }


  // extracts an environment from a value;
  // used for tokenising a string
  def env(v: Val) : List[(String, String)] = v match {
    case Empty => Nil
    case Chr(c) => Nil
    case Left(v) => env(v)
    case Right(v) => env(v)
    case Sequ(v1, v2) => env(v1) ::: env(v2)
    case Stars(vs) => vs.flatMap(env)
    case Ntime(vs) => vs.flatMap(env)
    case Rec(x, v) => (x, flatten(v))::env(v)
    case Optionall(v) => env(v)
    case Nots(s) => ("Negative", s) :: Nil
  }


  // The injection and mkeps part of the lexer
  //===========================================

  def mkeps(r: Rexp) : Val = r match {
    case ONE => Empty
    case ALTS(r1, r2) => 
      if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
    case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
    case STAR(r) => Stars(Nil)
    case RECD(x, r) => Rec(x, mkeps(r))
    case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
    case OPTIONAL(r) => Optionall(Empty)
    case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")  
    else Nots("")//Nots(s.reverse.toString)
    //   case NOT(ZERO) => Empty
    //   case NOT(CHAR(c)) => Empty
    //   case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
    //   case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
    //   case NOT(STAR(r)) => Stars(Nil) 

  }

  def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
    case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
    case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
    case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
    case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
    case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
    case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
    case (CHAR(d), Empty) => Chr(c) 
    case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
    case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
    case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
    case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
    case (ANYCHAR, Empty) => Chr(c)
  }

  // some "rectification" functions for simplification




  // The Lexing Rules for the WHILE Language

  // bnullable function: tests whether the aregular 
  // expression can recognise the empty string
  def bnullable (r: ARexp) : Boolean = r match {
    case AZERO => false
    case AONE(_) => true
    case ACHAR(_,_) => false
    case AALTS(_, rs) => rs.exists(bnullable)
    case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
    case ASTAR(_, _) => true
    case ANOT(_, rn) => !bnullable(rn)
  }

  def mkepsBC(r: ARexp) : Bits = r match {
    case AONE(bs) => bs
    case AALTS(bs, rs) => {
      val n = rs.indexWhere(bnullable)
      bs ++ mkepsBC(rs(n))
    }
    case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
    case ASTAR(bs, r) => bs ++ List(Z)
    case ANOT(bs, rn) => bs
  }


  def bder(c: Char, r: ARexp) : ARexp = r match {
    case AZERO => AZERO
    case AONE(_) => AZERO
    case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
    case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
    case ASEQ(bs, r1, r2) => 
      if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
      else ASEQ(bs, bder(c, r1), r2)
    case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
    case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
    case AANYCHAR(bs) => AONE(bs)
  } 

  def fuse(bs: Bits, r: ARexp) : ARexp = r match {
    case AZERO => AZERO
    case AONE(cs) => AONE(bs ++ cs)
    case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
    case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
    case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
    case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
    case ANOT(cs, r) => ANOT(bs ++ cs, r)
  }


  def internalise(r: Rexp) : ARexp = r match {
    case ZERO => AZERO
    case ONE => AONE(Nil)
    case CHAR(c) => ACHAR(Nil, c)
    //case PRED(f) => APRED(Nil, f)
    case ALTS(r1, r2) => 
      AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
      // case ALTS(r1::rs) => {
      //   val AALTS(Nil, rs2) = internalise(ALTS(rs))
      //   AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
      // }
    case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
    case STAR(r) => ASTAR(Nil, internalise(r))
    case RECD(x, r) => internalise(r)
    case NOT(r) => ANOT(Nil, internalise(r))
    case ANYCHAR => AANYCHAR(Nil)
  }


  def bsimp(r: ARexp): ARexp = 
  {
    r match {
      case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
        case (AZERO, _) => AZERO
        case (_, AZERO) => AZERO
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
      }
        case AALTS(bs1, rs) => {
          val rs_simp = rs.map(bsimp(_))
          val flat_res = flats(rs_simp)
          val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
          dist_res match {
            case Nil => AZERO
            case s :: Nil => fuse(bs1, s)
            case rs => AALTS(bs1, rs)  
          }

        }
            case r => r
    }
  }
  def strongBsimp(r: ARexp): ARexp =
  {
    r match {
      case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
        case (AZERO, _) => AZERO
        case (_, AZERO) => AZERO
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
      }
        case AALTS(bs1, rs) => {
          val rs_simp = rs.map(strongBsimp(_))
          val flat_res = flats(rs_simp)
          val dist_res = distinctBy4(flat_res)//distinctBy(flat_res, erase)
          dist_res match {
            case Nil => AZERO
            case s :: Nil => fuse(bs1, s)
            case rs => AALTS(bs1, rs)  
          }

        }
            case r => r
    }
  }

  def strongBsimp5(r: ARexp): ARexp =
  {
    // println("was this called?")
    r match {
      case ASEQ(bs1, r1, r2) => (strongBsimp5(r1), strongBsimp5(r2)) match {
        case (AZERO, _) => AZERO
        case (_, AZERO) => AZERO
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
      }
        case AALTS(bs1, rs) => {
          // println("alts case")
          val rs_simp = rs.map(strongBsimp5(_))
          val flat_res = flats(rs_simp)
          var dist_res = distinctBy5(flat_res)//distinctBy(flat_res, erase)
          // var dist2_res = distinctBy5(dist_res)
          // while(dist_res != dist2_res){
          //   dist_res = dist2_res
          //   dist2_res = distinctBy5(dist_res)
          // }
          (dist_res) match {
            case Nil => AZERO
            case s :: Nil => fuse(bs1, s)
            case rs => AALTS(bs1, rs)  
          }
        }
            case r => r
    }
  }

  def strongBsimp6(r: ARexp): ARexp =
  {
    // println("was this called?")
    r match {
      case ASEQ(bs1, r1, r2) => (strongBsimp6(r1), strongBsimp6(r2)) match {
        case (AZERO, _) => AZERO
        case (_, AZERO) => AZERO
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
        case (r1s, AONE(bs2)) => fuse(bs1, r1s) //asserted bs2 == Nil
        //case (r1s, r2s) if(isOne(erase(r1s))) => fuse(bs1 ++ mkepsBC(r1s), r2s)
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
      }
        case AALTS(bs1, rs) => {
          // println("alts case")
          val rs_simp = rs.map(strongBsimp6(_))
          val flat_res = flats(rs_simp)
          var dist_res = distinctBy6(flat_res)//distinctBy(flat_res, erase)
          (dist_res) match {
            case Nil => AZERO
            case s :: Nil => fuse(bs1, s)
            case rs => AALTS(bs1, rs)  
          }
        }
        //(erase(r0))) => AONE(bs)
            case r => r
    }
  }


    def atMostEmpty(r: Rexp) : Boolean = r match {
      case ZERO => true
      case ONE => true
      case STAR(r) => atMostEmpty(r)
      case SEQ(r1, r2) => atMostEmpty(r1) && atMostEmpty(r2)
      case ALTS(r1, r2) => atMostEmpty(r1) && atMostEmpty(r2)
      case CHAR(_) => false
    }


    def isOne(r: Rexp) : Boolean = r match {
      case ONE => true
      case SEQ(r1, r2) => isOne(r1) && isOne(r2)
      case ALTS(r1, r2) => (isOne(r1) || isOne(r2)) && (atMostEmpty(r1) && atMostEmpty(r2))//rs.forall(atMostEmpty) && rs.exists(isOne)
      case STAR(r0) => atMostEmpty(r0)
      case CHAR(c) => false
      case ZERO => false
    }

  def distinctWith(rs: List[ARexp], 
    pruneFunction: (ARexp, Set[Rexp]) => ARexp, 
    acc: Set[Rexp] = Set()) : List[ARexp] = 
      rs match{
        case Nil => Nil
        case r :: rs => 
          if(acc(erase(r)))
            distinctWith(rs, pruneFunction, acc)
          else {
            val pruned_r = pruneFunction(r, acc)
            pruned_r :: 
            distinctWith(rs, 
              pruneFunction, 
              turnIntoTerms(erase(pruned_r)) ++: acc
              )
          }
      }

    //r = r' ~ tail' : If tail' matches tail => returns r'
    def removeSeqTail(r: Rexp, tail: Rexp) : Rexp = 
      if (r == tail)
        ONE
      else {
        r match {
          case SEQ(r1, r2) => 
            if(r2 == tail) 
              r1
            else
              ZERO
          case r => ZERO
        }
      }

    def prune(r: ARexp, acc: Set[Rexp]) : ARexp = r match{
      case AALTS(bs, rs) => rs.map(r => prune(r, acc)).filter(_ != AZERO) match
      {
        //all components have been removed, meaning this is effectively a duplicate
        //flats will take care of removing this AZERO 
        case Nil => AZERO
        case r::Nil => fuse(bs, r)
        case rs1 => AALTS(bs, rs1)
      }
      case ASEQ(bs, r1, r2) => 
        //remove the r2 in (ra + rb)r2 to identify the duplicate contents of r1
        prune(r1, acc.map(r => removeSeqTail(r, erase(r2)))) match {
          //after pruning, returns 0
          case AZERO => AZERO
          //after pruning, got r1'.r2, where r1' is equal to 1
          case r1p if(isOne(erase(r1p))) => fuse(bs ++ mkepsBC(r1p), r2)
          //assemble the pruned head r1p with r2
          case r1p => ASEQ(bs, r1p, r2)
        }
        //this does the duplicate component removal task
      case r => if(acc(erase(r))) AZERO else r
    }

    //a stronger version of simp
    def bsimpStrong(r: ARexp): ARexp =
    {
      r match {
        case ASEQ(bs1, r1, r2) => (bsimpStrong(r1), bsimpStrong(r2)) match {
          //normal clauses same as simp
        case (AZERO, _) => AZERO
        case (_, AZERO) => AZERO
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
        //bs2 can be discarded
        case (r1s, AONE(bs2)) => fuse(bs1, r1s) //assert bs2 == Nil
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
        }
        case AALTS(bs1, rs) => {
          //distinctBy(flat_res, erase)
          distinctWith(flats(rs.map(bsimpStrong(_))), prune) match {
            case Nil => AZERO
            case s :: Nil => fuse(bs1, s)
            case rs => AALTS(bs1, rs)  
          }
        }
        //stars that can be treated as 1
        case ASTAR(bs, r0) if(atMostEmpty(erase(r0))) => AONE(bs)
        case r => r
      }
    }


    def bders (s: List[Char], r: ARexp) : ARexp = s match {
      case Nil => r
      case c::s => bders(s, bder(c, r))
    }

    def flats(rs: List[ARexp]): List[ARexp] = rs match {
      case Nil => Nil
      case AZERO :: rs1 => flats(rs1)
      case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
      case r1 :: rs2 => r1 :: flats(rs2)
    }

    def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
      case Nil => Nil
      case (x::xs) => {
        val res = f(x)
        if (acc.contains(res)) distinctBy(xs, f, acc)  
        else x::distinctBy(xs, f, res::acc)
      }
    } 


    def pruneRexp(r: ARexp, allowableTerms: List[Rexp]) : ARexp = {
      r match {
        case ASEQ(bs, r1, r2) => 
          val termsTruncated = allowableTerms.collect(rt => rt match {
            case SEQ(r1p, r2p) if(r2p == erase(r2)) => r1p//if(r2p == erase(r2)) 
          })
          val pruned : ARexp = pruneRexp(r1, termsTruncated)
          pruned match {
            case AZERO => AZERO
            case AONE(bs1) => fuse(bs ++ bs1, r2)
            case pruned1 => ASEQ(bs, pruned1, r2)
          }

            case AALTS(bs, rs) => 
              //allowableTerms.foreach(a => println(shortRexpOutput(a)))        
              val rsp = rs.map(r => 
                  pruneRexp(r, allowableTerms)
                  )
                    .filter(r => r != AZERO)
                    rsp match {
                      case Nil => AZERO
                      case r1::Nil => fuse(bs, r1)
                      case rs1 => AALTS(bs, rs1)
                    }
                      case r => 
                        if(allowableTerms.contains(erase(r))) r else AZERO //assert(r != AZERO)
      }
    }

    def oneSimp(r: Rexp) : Rexp = r match {
      case SEQ(ONE, r) => r
      case SEQ(r1, r2) => SEQ(oneSimp(r1), r2)
      case r => r//assert r != 0 

    }


    def distinctBy4(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
      case Nil => Nil
      case x :: xs =>
        //assert(acc.distinct == acc)
        val erased = erase(x)
        if(acc.contains(erased))
          distinctBy4(xs, acc)
        else{
          val addToAcc =  breakIntoTerms(erased).filter(r => !acc.contains(oneSimp(r)))
          //val xp = pruneRexp(x, addToAcc)
          pruneRexp(x, addToAcc) match {
            case AZERO => distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
            case xPrime => xPrime :: distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
          }
        }
    }

    // fun pAKC_aux :: "arexp list ⇒ arexp ⇒ rexp ⇒ arexp"
    //   where
    // "pAKC_aux rsa r ctx = (if (seqFold (SEQ (erase r) ( ctx) )) ⊆ (seqFold (erase (AALTs [] rsa))) then AZERO else
    //                           case r of (ASEQ bs r1 r2) ⇒ 
    //                             bsimp_ASEQ bs (pAKC_aux rsa r1 (SEQ  (erase r2) ( ctx) )) r2   |
    //                                     (AALTs bs rs) ⇒ 
    //                             bsimp_AALTs bs (flts (map (λr. pAKC_aux rsa r ( ctx) ) rs) )    |
    //                                     r             ⇒ r

    // def canonicalSeq(rs: List[Rexp], acc: Rexp) = rs match {
    //   case r::Nil => SEQ(r, acc)
    //   case Nil => acc
    //   case r1::r2::Nil => SEQ(SEQ(r1, r2), acc)
    // }


    //the "fake" Language interpretation: just concatenates!
    def seqFold(acc: Rexp, rs: List[Rexp]) : Rexp = rs match {
      case Nil => acc
      case r :: rs1 => 
        // if(acc == ONE) 
        //   seqFold(r, rs1) 
        // else
        seqFold(SEQ(acc, r), rs1)
    }

    def rprint(r: Rexp) : Unit = println(shortRexpOutput(r))
    def rsprint(rs: Iterable[Rexp]) = rs.foreach(r => println(shortRexpOutput(r)))

    def aprint(a: ARexp) = println(shortRexpOutput(erase(a)))
    def asprint(as: List[ARexp]) = as.foreach(a => println(shortRexpOutput(erase(a))))

    def pAKC(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
      // println("pakc")
      // println(shortRexpOutput(erase(r)))
      // println("acc")
      // rsprint(acc)
      // println("ctx---------")
      // rsprint(ctx)
      // println("ctx---------end")
      // rsprint(breakIntoTerms(seqFold(erase(r), ctx)).map(oneSimp))

      if (breakIntoTerms(seqFold(erase(r), ctx)).map(oneSimp).forall(acc.contains)) {//acc.flatMap(breakIntoTerms
        AZERO
      }
      else{
        r match {
          case ASEQ(bs, r1, r2) => 
            (pAKC(acc, r1, erase(r2) :: ctx)) match{
              case AZERO => 
                AZERO
              case AONE(bs1) => 
                fuse(bs1, r2)
              case r1p => 
                ASEQ(bs, r1p, r2)
            }
              case AALTS(bs, rs0) => 
                // println("before pruning")
                // println(s"ctx is ")
                // ctx.foreach(r => println(shortRexpOutput(r)))
                // println(s"rs0 is ")
                // rs0.foreach(r => println(shortRexpOutput(erase(r))))
                // println(s"acc is ")
                // acc.foreach(r => println(shortRexpOutput(r)))
                rs0.map(r => pAKC(acc, r, ctx)).filter(_ != AZERO) match {
                  case Nil => 
                    // println("after pruning Nil")
                    AZERO
                  case r :: Nil => 
                    // println("after pruning singleton")
                    // println(shortRexpOutput(erase(r)))
                    r 
                  case rs0p => 
                    // println("after pruning non-singleton")
                    AALTS(bs, rs0p)
                }
                  case r => r
        }
      }
    }

    def distinctBy5(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
      case Nil => 
        Nil
      case x :: xs => {
        val erased = erase(x)
        if(acc.contains(erased)){
          distinctBy5(xs, acc)
        }
        else{
          val pruned = pAKC(acc, x, Nil)
          val newTerms = breakIntoTerms(erase(pruned))
          pruned match {
            case AZERO => 
              distinctBy5(xs, acc)
            case xPrime => 
              xPrime :: distinctBy5(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
          }
        }
      }
    }


    def isOne1(r: Rexp) : Boolean = r match {
      case ONE => true
      case SEQ(r1, r2) => false//isOne1(r1) && isOne1(r2)
      case ALTS(r1, r2) => false//(isOne1(r1) || isOne1(r2)) && (atMostEmpty(r1) && atMostEmpty(r2))//rs.forall(atMostEmpty) && rs.exists(isOne)
      case STAR(r0) => false//atMostEmpty(r0)
      case CHAR(c) => false
      case ZERO => false

    }

    def turnIntoTerms(r: Rexp): List[Rexp] = r match {
      case SEQ(r1, r2)  => 
        turnIntoTerms(r1).flatMap(r11 => furtherSEQ(r11, r2))
          case ALTS(r1, r2) => turnIntoTerms(r1) ::: turnIntoTerms(r2)
          case ZERO => Nil
          case _ => r :: Nil
    }

    def furtherSEQ(r11: Rexp, r2: Rexp) : List[Rexp] = {
      if(r11 == ONE)
        turnIntoTerms(r2)
      else
        SEQ(r11, r2) :: Nil
    }

    def attachCtx(r: ARexp, ctx: List[Rexp]) : Set[Rexp] = {
      turnIntoTerms((seqFold(erase(r), ctx)))
        .toSet
    }

    def attachCtxcc(r: Rexp, ctx: List[Rexp]) : Set[Rexp] =
      turnIntoTerms(seqFold(r, ctx)).toSet

    def ABIncludedByC[A, B, C](a: A, b: B, c: C, f: (A, B) => C, 
      subseteqPred: (C, C) => Boolean) : Boolean = {
        subseteqPred(f(a, b), c)
    }
      def rs1_subseteq_rs2(rs1: Set[Rexp], rs2: Set[Rexp]) : Boolean = 
        //rs1 \subseteq rs2
      rs1.forall(rs2.contains)

      def prune6(acc: Set[Rexp], r: ARexp, ctx: List[Rexp] = Nil) : ARexp = {
        if (ABIncludedByC(a = r, b = ctx, c = acc, 
          f = attachCtx, subseteqPred = rs1_subseteq_rs2)) 
          {
            AZERO
          }
          else{
            r match {
              case ASEQ(bs, r1, r2) => (prune6(acc, r1, erase(r2) :: ctx)) match{
                case AZERO => 
                  AZERO
                case AONE(bs1) => //when r1 becomes 1, chances to further prune r2
                  prune6(acc, fuse(bs1, r2), ctx)
                case r1p => 
                  ASEQ(bs, r1p, r2)
              }
                case AALTS(bs, rs0) => 
                  rs0.map(r => prune6(acc, r, ctx)).filter(_ != AZERO) match {
                    case Nil => 
                      AZERO
                    case r :: Nil => 
                      fuse(bs, r) 
                    case rs0p => 
                      AALTS(bs, rs0p)
                  }
                    case r => r
            }
          }
      }

      def prune6cc(acc: Set[Rexp], r: Rexp, ctx: List[Rexp]) : Rexp = {
        if (ABIncludedByC(a = r, b = ctx, c = acc, 
          f = attachCtxcc, subseteqPred = rs1_subseteq_rs2)) 
          {//acc.flatMap(breakIntoTerms
            ZERO
          }
          else{
            r match {
              case SEQ(r1, r2) => 
                (prune6cc(acc, r1, r2 :: ctx)) match{
                  case ZERO => 
                    ZERO
                  case ONE => 
                    r2
                  case r1p => 
                    SEQ(r1p, r2)
                }
                  case ALTS(r1, r2) => 
                    List(r1, r2).map(r => prune6cc(acc, r, ctx)).filter(_ != AZERO) match {
                      case Nil => 
                        ZERO
                      case r :: Nil => 
                        r 
                      case ra :: rb :: Nil => 
                        ALTS(ra, rb)
                    }
                      case r => r
            }
          }
      }

      def distinctBy6(xs: List[ARexp], acc: Set[Rexp] = Set()) : 
      List[ARexp] = xs match {
        case Nil => 
          Nil
        case x :: xs => {
          val erased = erase(x)
          if(acc.contains(erased)){
            distinctBy6(xs, acc)
          }
          else{
            val pruned = prune6(acc, x)
            val newTerms = turnIntoTerms(erase(pruned))
            pruned match {
              case AZERO => 
                distinctBy6(xs, acc)
              case xPrime => 
                xPrime :: distinctBy6(xs, newTerms ++: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
            }
          }
        }
      }

      def distinctByacc(xs: List[Rexp], acc: Set[Rexp] = Set()) : Set[Rexp] = xs match {
        case Nil => 
          acc
        case x :: xs => {
          if(acc.contains(x)){
            distinctByacc(xs, acc)
          }
          else{
            val pruned = prune6cc(acc, x, Nil)
            val newTerms = turnIntoTerms(pruned)
            pruned match {
              case ZERO => 
                distinctByacc(xs, acc)
              case xPrime => 
                distinctByacc(xs, newTerms.map(oneSimp) ++: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
            }
          }
        }
      }

      def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
        case SEQ(r1, r2)  => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
        case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
        case ZERO => Nil
        case _ => r::Nil
      }

      def flatsIntoTerms(r: Rexp) : List[Rexp] = r match {
        //case SEQ(r1, r2)  => flatsIntoTerms(r1).map(r11 => SEQ(r11, r2))
        case ALTS(r1, r2) => flatsIntoTerms(r1) ::: flatsIntoTerms(r2)
        case ZERO => Nil
        case _ => r::Nil
      }


      def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
        case (ONE, bs) => (Empty, bs)
        case (CHAR(f), bs) => (Chr(f), bs)
        case (ALTS(r1, r2), Z::bs1) => {
          val (v, bs2) = decode_aux(r1, bs1)
          (Left(v), bs2)
        }
        case (ALTS(r1, r2), S::bs1) => {
          val (v, bs2) = decode_aux(r2, bs1)
          (Right(v), bs2)
        }
        case (SEQ(r1, r2), bs) => {
          val (v1, bs1) = decode_aux(r1, bs)
          val (v2, bs2) = decode_aux(r2, bs1)
          (Sequ(v1, v2), bs2)
        }
        case (STAR(r1), S::bs) => {
          val (v, bs1) = decode_aux(r1, bs)
          //(v)
          val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
          (Stars(v::vs), bs2)
        }
        case (STAR(_), Z::bs) => (Stars(Nil), bs)
        case (RECD(x, r1), bs) => {
          val (v, bs1) = decode_aux(r1, bs)
          (Rec(x, v), bs1)
        }
        case (NOT(r), bs) => (Nots(r.toString), bs)
      }

      def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
        case (v, Nil) => v
        case _ => throw new Exception("Not decodable")
      }



      def blexing_simp(r: Rexp, s: String) : Val = {
        val bit_code = blex_simp(internalise(r), s.toList)
        decode(r, bit_code)
      }
      def simpBlexer(r: Rexp, s: String) : Val = {
        Try(blexing_simp(r, s)).getOrElse(Failure)
      }

      def strong_blexing_simp(r: Rexp, s: String) : Val = {
        decode(r, strong_blex_simp(internalise(r), s.toList))
      }

      def strong_blexing_simp5(r: Rexp, s: String) : Val = {
        decode(r, strong_blex_simp5(internalise(r), s.toList))
      }




      //def blexerStrong(r: ARexp, s: String) : Val = {
      //  Try(strong_blexing_simp(r, s)).getOrElse(Failure)
      //}
      def strongBlexer5(r: Rexp, s: String): Val = {
        Try(strong_blexing_simp5(r, s)).getOrElse(Failure)
      }

      def strongBlexer(r: Rexp, s: String) : Option[Val] = {
        Try(Some(decode(r, strong_blex_simp(internalise(r), s.toList)))).getOrElse(None)
      }
      def strong_blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
        case Nil => {
          if (bnullable(r)) {
            mkepsBC(r)
          }
          else 
            throw new Exception("Not matched")
        }
        case c::cs => {
          strong_blex_simp(strongBsimp(bder(c, r)), cs)      
        }
      }

      def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match {
        case Nil => {
          if (bnullable(r)) {
            val bits = mkepsBC(r)
            bits
          }
          else 
            throw new Exception("Not matched")
        }
        case c::cs => {
          val der_res = bder(c,r)
          val simp_res = strongBsimp5(der_res)  
          strong_blex_simp5(simp_res, cs)      
        }
      }


      def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
        case Nil => r
        case c::s => 
          //println(erase(r))
          bders_simp(s, bsimp(bder(c, r)))
      }

      def bdersStrong5(s: List[Char], r: ARexp) : ARexp = s match {
        case Nil => r
        case c::s => bdersStrong5(s, strongBsimp5(bder(c, r)))
      }
      def bdersStrong6(s: List[Char], r: ARexp) : ARexp = s match {
        case Nil => r
        case c::s => bdersStrong6(s, strongBsimp6(bder(c, r)))
      }
      //Conjecture: [| bdersStrong(s, r) |] = O([| r |]^3)
      def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
        case Nil => r
        case c::s => bdersStrong(s, bsimpStrong(bder(c, r)))
      }
      def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))

      //def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
      //  case Nil => r 
      //  case c::s => bdersStrong(s, strongBsimp(bder(c, r)))
      //}

      def bdersStrongRexp(s: String, r: Rexp) : ARexp = bdersStrong(s.toList, internalise(r))

      def erase(r:ARexp): Rexp = r match{
        case AZERO => ZERO
        case AONE(_) => ONE
        case ACHAR(bs, c) => CHAR(c)
        case AALTS(bs, Nil) => ZERO
        case AALTS(bs, a::Nil) => erase(a)
        case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
        case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
        case ASTAR(cs, r)=> STAR(erase(r))
        case ANOT(bs, r) => NOT(erase(r))
        case AANYCHAR(bs) => ANYCHAR
      }


      def allCharSeq(r: Rexp) : Boolean = r match {
        case CHAR(c) => true
        case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
        case _ => false
      }

      def flattenSeq(r: Rexp) : String = r match {
        case CHAR(c) => c.toString
        case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
        case _ => throw new Error("flatten unflattenable rexp")
      } 

      def shortRexpOutput(r: Rexp) : String = r match {
        case CHAR(c) => c.toString
        case ONE => "1"
        case ZERO => "0"
        case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
        case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
        case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
        case STAR(STAR(r)) => "(..)*"// ++ shortRexpOutput(r) ++ "]*"
        case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
        //case RTOP => "RTOP"
      }


      def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
        case Nil => {
          if (bnullable(r)) {
            val bits = mkepsBC(r)
            bits
          }
          else 
            throw new Exception("Not matched")
        }
        case c::cs => {
          val der_res = bder(c,r)
          val simp_res = bsimp(der_res)  
          blex_simp(simp_res, cs)      
        }
      }




      def size(r: Rexp) : Int = r match {
        case ZERO => 1
        case ONE => 1
        case CHAR(_) => 1
        case ANYCHAR => 1
        case NOT(r0) => 1 + size(r0)
        case SEQ(r1, r2) => 1 + size(r1) + size(r2)
        case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
        case STAR(r) => 1 + size(r)
      }

      def asize(a: ARexp) = size(erase(a))

      //pder related
      type Mon = (Char, Rexp)
      type Lin = Set[Mon]

      def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
        case ZERO => Set()
        case ONE => rs
        case r => rs.map((re) => if (re == ONE) r else SEQ(re, r)  )   
      }
      def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
        case ZERO => Set()
        // case ONE => l
        case t => l.map( m => m._2 match 
        {
          case ZERO => m 
          case ONE => (m._1, t) 
          case p => (m._1, SEQ(p, t)) 
        }  

        )
      }
      def cir_prodList(l : List[Mon], t: Rexp): List[Mon] = t match {
        case ZERO => Nil
        case ONE => l
        case t => l.map(m => m._2 match
        {
          case ZERO => m
          case ONE => (m._1, t)
          case p => (m._1, SEQ(p, t))
        }
        )

      }
      def lfList(r: Rexp) : List[Mon] = r match {
        case ZERO => Nil
        case ONE => Nil
        case CHAR(f) => {
          (f, ONE) :: Nil
        }
        case ALTS(r1, r2) => {
          lfList(r1) ++ lfList(r2)
        }
        case STAR(r1) => cir_prodList(lfList(r1), STAR(r1))
        case SEQ(r1, r2) => {
          if(nullable(r1))
            cir_prodList(lfList(r1), r2) ++ lfList(r2)
          else
            cir_prodList(lfList(r1), r2)
        }
      }
      def lfprint(ls: Lin) = ls.foreach(m =>{
        println(m._1)
        rprint(m._2)
      })
    def lf(r: Rexp): Lin = r match {
      case ZERO => Set()
      case ONE => Set()
      case CHAR(f) => {
        //val Some(c) = alphabet.find(f) 
        Set((f, ONE))
      }
      case ALTS(r1, r2) => {
        lf(r1 ) ++ lf(r2)
      }
      case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
      case SEQ(r1, r2) =>{
        if (nullable(r1))
          cir_prod(lf(r1), r2) ++ lf(r2)
        else
          cir_prod(lf(r1), r2) 
      }
    }
    def lfs(r: Set[Rexp]): Lin = {
      r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
    }

    def pder(x: Char, t: Rexp): Set[Rexp] = {
      val lft = lf(t)
      (lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
    }
    def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
      case x::xs => pders(xs, pder(x, t))
      case Nil => Set(t)
    }
    def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
      case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
      case Nil => ts 
    }
    def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = 
      ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
    def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
    //all implementation of partial derivatives that involve set union are potentially buggy
    //because they don't include the original regular term before they are pdered.
    //now only pderas is fixed.
    def pderas(t: Set[Rexp], d: Int): Set[Rexp] = 
      if(d > 0) 
        pderas(lfs(t).map(mon => mon._2), d - 1) ++ t 
      else 
        lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
  def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
  def awidth(r: Rexp) : Int = r match {
    case CHAR(c) => 1
    case SEQ(r1, r2) => awidth(r1) + awidth(r2)
    case ALTS(r1, r2) => awidth(r1) + awidth(r2)
    case ONE => 0
    case ZERO => 0
    case STAR(r) => awidth(r)
  }
  //def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
  //def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
  def o(r: Rexp) = if (nullable(r)) ONE else ZERO
  //def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
  def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
    case ZERO => Set[Rexp]()
    case ONE => Set[Rexp]()
    case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
    case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
    case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
    case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
  }
  def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
    case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
    case Nil => ts   
  }
  def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)



  def starPrint(r: Rexp) : Unit = r match {

    case SEQ(head, rstar) =>
      println(shortRexpOutput(head) ++ "~STARREG")
    case STAR(rstar) =>
      println("STARREG")
    case ALTS(r1, r2) =>  
      println("(")
      starPrint(r1)
      println("+")
      starPrint(r2)
      println(")")
    case ZERO => println("0")
  }

  // @arg(doc = "small tests")
  def n_astar_list(d: Int) : Rexp = {
    if(d == 0) STAR("a") 
    else ALTS(STAR("a" * d), n_astar_list(d - 1))
  }
  def n_astar_alts(d: Int) : Rexp = d match {
    case 0 => n_astar_list(0)
    case d => STAR(n_astar_list(d))
  }
  def n_astar_alts2(d: Int) : Rexp = d match {
    case 0 => n_astar_list(0)
    case d => STAR(STAR(n_astar_list(d)))
  }
  def n_astar_aux(d: Int) = {
    if(d == 0) n_astar_alts(0)
    else ALTS(n_astar_alts(d), n_astar_alts(d - 1))
  }

  def n_astar(d: Int) : Rexp = STAR(n_astar_aux(d))
  //val STARREG = n_astar(3)
  // ( STAR("a") | 
  //                  ("a" | "aa").% | 
  //                 ( "a" | "aa" | "aaa").% 
  //                 ).%
  //( "a" | "aa" | "aaa" | "aaaa").% |
  //( "a" | "aa" | "aaa" | "aaaa" | "aaaaa").% 
  (((STAR("a") | ( STAR("aaa")) | STAR("aaaaa"| ( STAR("aaaaaaa")) | STAR("aaaaaaaaaaa"))).%).%).%

  // @main

  def lcm(list: Seq[Int]):Int=list.foldLeft(1:Int){
    (a, b) => b * a /
    Stream.iterate((a,b)){case (x,y) => (y, x%y)}.dropWhile(_._2 != 0).head._1.abs
  }

  def small() = {
    //val pderSTAR = pderUNIV(STARREG)

    //val refSize = pderSTAR.map(size(_)).sum
    for(n <- 5 to 5){
      val STARREG = n_astar_alts2(n)
      val iMax = (lcm((1 to n).toList))
      for(i <- 0 to iMax ){// 100, 400, 800, 840, 841, 900 
        val prog0 = "a" * i
        //println(s"test: $prog0")
        print(i)
        print(" ")
        // print(i)
        // print(" ")
        println(asize(bders_simp(prog0.toList, internalise(STARREG))))
        // println(asize(bdersStrong(prog0.toList, internalise(STARREG))))
      }

    }
  }
  // small()

  def aaa_star() = {
    val r = STAR(("a" | "aa"))
    for(i <- 0 to 20) {
      val prog = "a" * i
      print(i+" ")
      println(asize(bders_simp(prog.toList, internalise(r))))
      //println(size(ders_simp(prog.toList, r)))
    }
  }
  // aaa_star()

  def matching_ways_counting(n: Int): Int = 
  {
    if(n == 0) 1
    else if(n == 1) 2
    else (for(i <- 1 to n - 1) 
      yield matching_ways_counting(i) * matching_ways_counting(n - i) ).sum + (n + 1)
  }
  def naive_matcher() {
    val r = STAR(STAR("a") ~ STAR("a"))

    // for(i <- 0 to 10) {
    //   val s = "a" * i 
    //   val t1 = System.nanoTime
    //   matcher(s, r)
    //   val duration = (System.nanoTime - t1) / 1e9d
    //   print(i)
    //   print(" ")
    //   // print(duration)
    //   // print(" ")
    //   (aprint(bders_simp(s.toList, internalise(r))))
    //   //print(size(ders_simp(s.toList, r)))
    //   println()
    // }
    for(i <- 1 to 3) {
      val s = "a" * i
      val t1 = System.nanoTime
      val derssimp_result = ders_simp(s.toList, r)
      println(i)
      println(matching_ways_counting(i))
      println(size(derssimp_result))
      rprint(derssimp_result)
    }

  }
  naive_matcher()
  //single(SEQ(SEQ(STAR(CHAR('b')),STAR(STAR(SEQ(CHAR('a'),CHAR('b'))))),
  //  SEQ(SEQ(CHAR('b'),STAR(ALTS(CHAR('a'),ONE))),ONE)))


  //STAR(SEQ(ALTS(CHAR(b),STAR(CHAR(b))),ALTS(ALTS(ONE,CHAR(b)),SEQ(CHAR(c),ONE))))
  def generator_test() {

    test(single(STAR(SEQ(ALTS(STAR(CHAR('b')),ALTS(CHAR('b'),CHAR('a'))),ALTS(ALTS(CHAR('b'),CHAR('c')),STAR(ZERO))))), 1) { (r: Rexp) => 
      // ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) => 
      val ss = Set("ab")//stringsFromRexp(r)
      val boolList = ss.map(s => {
        //val bdStrong = bdersStrong(s.toList, internalise(r))
        val bdStrong6 = bdersStrong6(s.toList, internalise(r))
        val bdStrong6Set = breakIntoTerms(erase(bdStrong6))
        val pdersSet = pderUNIV(r)//.flatMap(r => turnIntoTerms(r))
        val pdersSetBroken = pdersSet.flatMap(r => turnIntoTerms(r))
        (rs1_subseteq_rs2(bdStrong6Set.toSet, distinctByacc(pdersSet.toList)) ||
          bdStrong6Set.size <= pdersSet.size || bdStrong6Set.size <= pdersSetBroken.size ||
          rs1_subseteq_rs2(bdStrong6Set.toSet, pdersSet union pdersSetBroken))//|| bdStrong6Set.size <= pdersSetBroken.size

      })
      //!boolList.exists(b => b == false)
      !boolList.exists(b => b == false)
      }
    }
    // small()
    // generator_test()



    //STAR(STAR(STAR(SEQ(SEQ(ALTS(STAR(ALTS(ONE,CHAR('c'))),
    //  CHAR('c')),ALTS(CHAR('a'),ALTS(STAR(CHAR('b')),ALTS(CHAR('a'),ZERO)))),
    //  CHAR('c')))))//SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE)))//STAR(SEQ(ALTS(STAR(CHAR('c')),CHAR('c')),SEQ(ALTS(CHAR('c'),ONE),ONE)))
    //counterexample1: STAR(SEQ(ALTS(STAR(ZERO),ALTS(CHAR(a),CHAR(b))),SEQ(ONE,ALTS(CHAR(a),CHAR(b)))))
    //counterexample2: SEQ(ALTS(SEQ(CHAR(a),STAR(ONE)),STAR(ONE)),ALTS(CHAR(a),SEQ(ALTS(CHAR(c),CHAR(a)),CHAR(b))))

    //new ce1 : STAR(SEQ(ALTS(ALTS(ONE,CHAR(a)),SEQ(ONE,CHAR(b))),ALTS(CHAR(a),ALTS(CHAR(b),CHAR(a)))))
    //new ce2 : ALTS(CHAR(b),SEQ(ALTS(ZERO,ALTS(CHAR(b),CHAR(b))),ALTS(ALTS(CHAR(a),CHAR(b)),SEQ(CHAR(c),ONE))))
    //new ce3 : SEQ(CHAR(b),ALTS(ALTS(ALTS(ONE,CHAR(a)),SEQ(CHAR(c),ONE)),SEQ(STAR(ZERO),SEQ(ONE,CHAR(b)))))
    //circular double-nullable STAR(SEQ((STAR("b") | "a" | "b"), ("b" | ONE)))
    def counterexample_check() {
      val r : Rexp = SEQ(ALTS(ONE, 
        SEQ(ALTS(CHAR('f'), CHAR('b')), CHAR('g'))), SEQ(CHAR('a'), SEQ(CHAR('d'), CHAR('e'))))//(1+(f +b)·g)·(a·(d·e))//STAR(SEQ(ALTS(STAR(CHAR('b')),ALTS(CHAR('b'),CHAR('a'))),
      val acc : Set[Rexp] = Set(SEQ(CHAR('a'), SEQ(CHAR('d'), CHAR('e'))), SEQ(SEQ(CHAR('f'), CHAR('g')), SEQ(CHAR('a'), SEQ(CHAR('d'), CHAR('e')))) )
      val a = internalise(r)
      aprint(prune(a, acc));
        //ALTS(ALTS(CHAR('b'),CHAR('c')),STAR(ZERO))))//SEQ(SEQ(STAR(ALTS(CHAR('a'),CHAR('b'))),
      //ALTS(ALTS(CHAR('c'),CHAR('b')),STAR(ONE))),STAR(CHAR('b')))
      // val s = "ab"
      // val bdStrong5 = bdersStrong6(s.toList, internalise(r))
      // val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
      // val pdersSet = pderUNIV(r)//.map(r => erase(bsimp(internalise(r))))//.flatMap(r => turnIntoTerms(r))//.map(oneSimp).flatMap(r => breakIntoTerms(r))
      // val apdersSet = pdersSet.map(internalise)
      // println("original regex ")
      // rprint(r)
      // println("after strong bsimp")
      // aprint(bdStrong5)
      // println("turned into a set %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   ")
      // rsprint(bdStrong5Set)
      // println("after pderUNIV")
      // rsprint(pdersSet.toList)
      // println("pderUNIV distinctBy6")
      // //asprint(distinctBy6(apdersSet.toList))
      // rsprint((pdersSet.toList).flatMap(turnIntoTerms))
      // rsprint(turnIntoTerms(pdersSet.toList(3)))
      // println("NO 3 not into terms")
      // rprint((pdersSet.toList()))
      // println("after pderUNIV broken")
      // rsprint(pdersSet.flatMap(r => turnIntoTerms(r)).toList)

    }

    // counterexample_check()

    def lf_display() {
      val r = STAR(SEQ(ALTS(STAR(CHAR('b')),ALTS(CHAR('b'),CHAR('a'))),
        ALTS(ALTS(CHAR('b'),CHAR('c')),STAR(ZERO))))//SEQ(SEQ(STAR(ALTS(CHAR('a'),CHAR('b'))),
      //ALTS(ALTS(CHAR('c'),CHAR('b')),STAR(ONE))),STAR(CHAR('b')))
      val s = "ab"
      val bdStrong5 = bdersStrong6(s.toList, internalise(r))
      val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
      val pdersSet = pderUNIV(r)//.map(r => erase(bsimp(internalise(r))))//.flatMap(r => turnIntoTerms(r))//.map(oneSimp).flatMap(r => breakIntoTerms(r))
      val apdersSet = pdersSet.map(internalise)
      lfprint(lf(r))

    }

    // lf_display()

    def breakable(r: Rexp) : Boolean = r match {
      case SEQ(ALTS(_, _), _) => true
      case SEQ(r1, r2) => breakable(r1)
      case _ => false
    }

    def linform_test() {
      val r = STAR(SEQ(STAR(CHAR('c')),ONE))//SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE))) //
      val r_linforms = lf(r)
      println(r_linforms.size)
      val pderuniv = pderUNIV(r)
      println(pderuniv.size)
    }
    // linform_test()
    // 1
    def newStrong_test() {
      val r2 = (CHAR('b') | ONE)
      val r0 = CHAR('d')
      val r1 = (ONE | CHAR('c'))
      val expRexp = (SEQ(r2, r0) | SEQ(SEQ(r1, r2), r0))
      println(s"original regex is: ")
      rprint(expRexp)
      val expSimp5 = strongBsimp5(internalise(expRexp))
      val expSimp6 = strongBsimp6(internalise(expRexp))
      aprint(expSimp5)
      aprint(expSimp6)
    }
    // newStrong_test()
    // SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))),
    // SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))),
    // STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))),
    // SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE))


    // Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))
    // Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))





    def bders_bderssimp() {

      test(single(SEQ(ALTS(ONE,CHAR('c')),
        SEQ(SEQ(CHAR('a'),CHAR('a')),ALTS(ONE,CHAR('c'))))), 1) { (r: Rexp) => 
          // ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) => 
          val ss = Set("aa")//stringsFromRexp(r)
          val boolList = ss.map(s => {
            //val bdStrong = bdersStrong(s.toList, internalise(r))
            val bds = bsimp(bders(s.toList, internalise(r)))
            val bdssimp = bsimp(bders_simp(s.toList, internalise(r)))
            //val pdersSet = pderUNIV(r)//.flatMap(r => turnIntoTerms(r))
            //val pdersSetBroken = pdersSet.flatMap(r => turnIntoTerms(r))
            //rs1_subseteq_rs2(bdStrong6Set.toSet, distinctByacc(pdersSet.toList))
            //bdStrong6Set.size <= pdersSet.size || bdStrong6Set.size <= pdersSetBroken.size ||
            //rs1_subseteq_rs2(bdStrong6Set.toSet, pdersSet union pdersSetBroken)//|| bdStrong6Set.size <= pdersSetBroken.size
            rprint(r)
            println(bds)
            println(bdssimp)
            bds == bdssimp
          })
          //!boolList.exists(b => b == false)
          !boolList.exists(b => b == false)
          }
        }
        //  bders_bderssimp()



println("hi!!!!!")
counterexample_check()