RegexExplosionPlot/evilForBsimp.sc
changeset 517 3c5e58d08939
child 518 ff7945a988a3
equal deleted inserted replaced
516:6fecb7fe8cd0 517:3c5e58d08939
       
     1 // A simple lexer inspired by work of Sulzmann & Lu
       
     2 //==================================================
       
     3 //
       
     4 // Call the test cases with 
       
     5 //
       
     6 //   amm lexer.sc small
       
     7 //   amm lexer.sc fib
       
     8 //   amm lexer.sc loops
       
     9 //   amm lexer.sc email
       
    10 //
       
    11 //   amm lexer.sc all
       
    12 
       
    13 
       
    14 
       
    15 // regular expressions including records
       
    16 abstract class Rexp 
       
    17 case object ZERO extends Rexp
       
    18 case object ONE extends Rexp
       
    19 case object ANYCHAR extends Rexp
       
    20 case class CHAR(c: Char) extends Rexp
       
    21 case class ALTS(r1: Rexp, r2: Rexp) extends Rexp 
       
    22 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
       
    23 case class STAR(r: Rexp) extends Rexp 
       
    24 case class RECD(x: String, r: Rexp) extends Rexp  
       
    25 case class NTIMES(n: Int, r: Rexp) extends Rexp
       
    26 case class OPTIONAL(r: Rexp) extends Rexp
       
    27 case class NOT(r: Rexp) extends Rexp
       
    28                 // records for extracting strings or tokens
       
    29   
       
    30 // values  
       
    31 abstract class Val
       
    32 case object Failure extends Val
       
    33 case object Empty extends Val
       
    34 case class Chr(c: Char) extends Val
       
    35 case class Sequ(v1: Val, v2: Val) extends Val
       
    36 case class Left(v: Val) extends Val
       
    37 case class Right(v: Val) extends Val
       
    38 case class Stars(vs: List[Val]) extends Val
       
    39 case class Rec(x: String, v: Val) extends Val
       
    40 case class Ntime(vs: List[Val]) extends Val
       
    41 case class Optionall(v: Val) extends Val
       
    42 case class Nots(s: String) extends Val
       
    43 
       
    44 
       
    45 
       
    46 abstract class Bit
       
    47 case object Z extends Bit
       
    48 case object S extends Bit
       
    49 
       
    50 
       
    51 type Bits = List[Bit]
       
    52 
       
    53 abstract class ARexp 
       
    54 case object AZERO extends ARexp
       
    55 case class AONE(bs: Bits) extends ARexp
       
    56 case class ACHAR(bs: Bits, c: Char) extends ARexp
       
    57 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
       
    58 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
       
    59 case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
       
    60 case class ANOT(bs: Bits, r: ARexp) extends ARexp
       
    61 case class AANYCHAR(bs: Bits) extends ARexp
       
    62 
       
    63 import scala.util.Try
       
    64 
       
    65 trait Generator[+T] {
       
    66     self => // an alias for "this"
       
    67     def generate(): T
       
    68   
       
    69     def gen(n: Int) : List[T] = 
       
    70       if (n == 0) Nil else self.generate() :: gen(n - 1)
       
    71     
       
    72     def map[S](f: T => S): Generator[S] = new Generator[S] {
       
    73       def generate = f(self.generate())  
       
    74     }
       
    75     def flatMap[S](f: T => Generator[S]): Generator[S] = new Generator[S] {
       
    76       def generate = f(self.generate()).generate()
       
    77     }
       
    78 
       
    79 
       
    80 }
       
    81 
       
    82   // tests a property according to a given random generator
       
    83   def test[T](r: Generator[T], amount: Int = 100)(pred: T => Boolean) {
       
    84     for (_ <- 0 until amount) {
       
    85       val value = r.generate()
       
    86       assert(pred(value), s"Test failed for: $value")
       
    87     }
       
    88     println(s"Test passed $amount times")
       
    89   }
       
    90   def test2[T, S](r: Generator[T], s: Generator[S], amount: Int = 100)(pred: (T, S) => Boolean) {
       
    91     for (_ <- 0 until amount) {
       
    92       val valueR = r.generate()
       
    93       val valueS = s.generate()
       
    94       assert(pred(valueR, valueS), s"Test failed for: $valueR, $valueS")
       
    95     }
       
    96     println(s"Test passed $amount times")
       
    97   }
       
    98 
       
    99 // random integers
       
   100 val integers = new Generator[Int] {
       
   101   val rand = new java.util.Random
       
   102   def generate() = rand.nextInt()
       
   103 }
       
   104 
       
   105 // random booleans
       
   106 val booleans = integers.map(_ > 0)
       
   107   
       
   108 // random integers in the range lo and high  
       
   109 def range(lo: Int, hi: Int): Generator[Int] = 
       
   110   for (x <- integers) yield (lo + x.abs % (hi - lo)).abs
       
   111 
       
   112 // random characters
       
   113 def chars_range(lo: Char, hi: Char) = range(lo, hi).map(_.toChar)  
       
   114 val chars = chars_range('a', 'z')
       
   115 
       
   116 
       
   117 def oneOf[T](xs: T*): Generator[T] = 
       
   118   for (idx <- range(0, xs.length)) yield xs(idx)
       
   119   
       
   120 def single[T](x: T) = new Generator[T] {
       
   121   def generate() = x
       
   122 }   
       
   123 
       
   124 def pairs[T, U](t: Generator[T], u: Generator[U]): Generator[(T, U)] = 
       
   125   for (x <- t; y <- u) yield (x, y)
       
   126 
       
   127 // lists
       
   128 def emptyLists = single(Nil) 
       
   129 
       
   130 def nonEmptyLists : Generator[List[Int]] = 
       
   131   for (head <- integers; tail <- lists) yield head :: tail
       
   132 
       
   133 def lists: Generator[List[Int]] = for {
       
   134   kind <- booleans
       
   135   list <- if (kind) emptyLists else nonEmptyLists
       
   136 } yield list
       
   137 
       
   138 def char_list(len: Int): Generator[List[Char]] = {
       
   139   if(len <= 0) single(Nil)
       
   140   else{
       
   141     for { 
       
   142       c <- chars_range('a', 'c')
       
   143       tail <- char_list(len - 1)
       
   144     } yield c :: tail
       
   145   }
       
   146 }
       
   147 
       
   148 def strings(len: Int): Generator[String] = for(cs <- char_list(len)) yield cs.toString
       
   149 
       
   150 def sampleString(r: Rexp) : List[String] = r match {
       
   151   case STAR(r) => stringsFromRexp(r).flatMap(s => List("", s, s ++ s))//only generate 0, 1, 2 reptitions
       
   152   case SEQ(r1, r2) => stringsFromRexp(r1).flatMap(s1 => stringsFromRexp(r2).map(s2 => s1 ++ s2) )
       
   153   case ALTS(r1, r2) => throw new Error(s" Rexp ${r} not expected: all alternatives are supposed to have been opened up")
       
   154   case ONE => "" :: Nil
       
   155   case ZERO => Nil
       
   156   case CHAR(c) => c.toString :: Nil
       
   157 
       
   158 }
       
   159 
       
   160 def stringsFromRexp(r: Rexp) : List[String] = 
       
   161   breakIntoTerms(r).flatMap(r => sampleString(r))
       
   162 
       
   163 
       
   164 // (simple) binary trees
       
   165 trait Tree[T]
       
   166 case class Inner[T](left: Tree[T], right: Tree[T]) extends Tree[T]
       
   167 case class Leaf[T](x: T) extends Tree[T]
       
   168 
       
   169 def leafs[T](t: Generator[T]): Generator[Leaf[T]] = 
       
   170   for (x <- t) yield Leaf(x)
       
   171 
       
   172 def inners[T](t: Generator[T]): Generator[Inner[T]] = 
       
   173   for (l <- trees(t); r <- trees(t)) yield Inner(l, r)
       
   174 
       
   175 def trees[T](t: Generator[T]): Generator[Tree[T]] = 
       
   176   for (kind <- range(0, 2);  
       
   177        tree <- if (kind == 0) leafs(t) else inners(t)) yield tree
       
   178 
       
   179 // regular expressions
       
   180 
       
   181 // generates random leaf-regexes; prefers CHAR-regexes
       
   182 def leaf_rexp() : Generator[Rexp] =
       
   183   for (kind <- range(0, 5);
       
   184        c <- chars_range('a', 'd')) yield
       
   185     kind match {
       
   186       case 0 => ZERO
       
   187       case 1 => ONE
       
   188       case _ => CHAR(c) 
       
   189     }
       
   190 
       
   191 // generates random inner regexes with maximum depth d
       
   192 def inner_rexp(d: Int) : Generator[Rexp] =
       
   193   for (kind <- range(0, 3);
       
   194        l <- rexp(d); 
       
   195        r <- rexp(d))
       
   196   yield kind match {
       
   197     case 0 => ALTS(l, r)
       
   198     case 1 => SEQ(l, r)
       
   199     case 2 => STAR(r)
       
   200   }
       
   201 
       
   202 // generates random regexes with maximum depth d;
       
   203 // prefers inner regexes in 2/3 of the cases
       
   204 def rexp(d: Int = 100): Generator[Rexp] = 
       
   205   for (kind <- range(0, 3);
       
   206        r <- if (d <= 0 || kind == 0) leaf_rexp() else inner_rexp(d - 1)) yield r
       
   207 
       
   208 
       
   209 // some test functions for rexps
       
   210 def height(r: Rexp) : Int = r match {
       
   211   case ZERO => 1
       
   212   case ONE => 1
       
   213   case CHAR(_) => 1
       
   214   case ALTS(r1, r2) => 1 + List(height(r1), height(r2)).max
       
   215   case SEQ(r1, r2) =>  1 + List(height(r1), height(r2)).max
       
   216   case STAR(r) => 1 + height(r)
       
   217 }
       
   218 
       
   219 // def size(r: Rexp) : Int = r match {
       
   220 //   case ZERO => 1
       
   221 //   case ONE => 1
       
   222 //   case CHAR(_) => 1
       
   223 //   case ALTS(r1, r2) => 1 + size(r1) + size(r2)
       
   224 //   case SEQ(r1, r2) =>  1 + size(r1) + size(r2)
       
   225 //   case STAR(r) => 1 + size(r) 
       
   226 // }
       
   227 
       
   228 // randomly subtracts 1 or 2 from the STAR case
       
   229 def size_faulty(r: Rexp) : Int = r match {
       
   230   case ZERO => 1
       
   231   case ONE => 1
       
   232   case CHAR(_) => 1
       
   233   case ALTS(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
       
   234   case SEQ(r1, r2) =>  1 + size_faulty(r1) + size_faulty(r2)
       
   235   case STAR(r) => 1 + size_faulty(r) - range(0, 2).generate
       
   236 }
       
   237 
       
   238 
       
   239    
       
   240 // some convenience for typing in regular expressions
       
   241 
       
   242 def charlist2rexp(s : List[Char]): Rexp = s match {
       
   243   case Nil => ONE
       
   244   case c::Nil => CHAR(c)
       
   245   case c::s => SEQ(CHAR(c), charlist2rexp(s))
       
   246 }
       
   247 implicit def string2rexp(s : String) : Rexp = 
       
   248   charlist2rexp(s.toList)
       
   249 
       
   250 implicit def RexpOps(r: Rexp) = new {
       
   251   def | (s: Rexp) = ALTS(r, s)
       
   252   def % = STAR(r)
       
   253   def ~ (s: Rexp) = SEQ(r, s)
       
   254 }
       
   255 
       
   256 implicit def stringOps(s: String) = new {
       
   257   def | (r: Rexp) = ALTS(s, r)
       
   258   def | (r: String) = ALTS(s, r)
       
   259   def % = STAR(s)
       
   260   def ~ (r: Rexp) = SEQ(s, r)
       
   261   def ~ (r: String) = SEQ(s, r)
       
   262   def $ (r: Rexp) = RECD(s, r)
       
   263 }
       
   264 
       
   265 def nullable(r: Rexp) : Boolean = r match {
       
   266   case ZERO => false
       
   267   case ONE => true
       
   268   case CHAR(_) => false
       
   269   case ANYCHAR => false
       
   270   case ALTS(r1, r2) => nullable(r1) || nullable(r2)
       
   271   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
   272   case STAR(_) => true
       
   273   case RECD(_, r1) => nullable(r1)
       
   274   case NTIMES(n, r) => if (n == 0) true else nullable(r)
       
   275   case OPTIONAL(r) => true
       
   276   case NOT(r) => !nullable(r)
       
   277 }
       
   278 
       
   279 def der(c: Char, r: Rexp) : Rexp = r match {
       
   280   case ZERO => ZERO
       
   281   case ONE => ZERO
       
   282   case CHAR(d) => if (c == d) ONE else ZERO
       
   283   case ANYCHAR => ONE 
       
   284   case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
       
   285   case SEQ(r1, r2) => 
       
   286     if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
       
   287     else SEQ(der(c, r1), r2)
       
   288   case STAR(r) => SEQ(der(c, r), STAR(r))
       
   289   case RECD(_, r1) => der(c, r1)
       
   290   case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
       
   291   case OPTIONAL(r) => der(c, r)
       
   292   case NOT(r) =>  NOT(der(c, r))
       
   293 }
       
   294 
       
   295 
       
   296 // extracts a string from a value
       
   297 def flatten(v: Val) : String = v match {
       
   298   case Empty => ""
       
   299   case Chr(c) => c.toString
       
   300   case Left(v) => flatten(v)
       
   301   case Right(v) => flatten(v)
       
   302   case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
       
   303   case Stars(vs) => vs.map(flatten).mkString
       
   304   case Ntime(vs) => vs.map(flatten).mkString
       
   305   case Optionall(v) => flatten(v)
       
   306   case Rec(_, v) => flatten(v)
       
   307 }
       
   308 
       
   309 
       
   310 // extracts an environment from a value;
       
   311 // used for tokenising a string
       
   312 def env(v: Val) : List[(String, String)] = v match {
       
   313   case Empty => Nil
       
   314   case Chr(c) => Nil
       
   315   case Left(v) => env(v)
       
   316   case Right(v) => env(v)
       
   317   case Sequ(v1, v2) => env(v1) ::: env(v2)
       
   318   case Stars(vs) => vs.flatMap(env)
       
   319   case Ntime(vs) => vs.flatMap(env)
       
   320   case Rec(x, v) => (x, flatten(v))::env(v)
       
   321   case Optionall(v) => env(v)
       
   322   case Nots(s) => ("Negative", s) :: Nil
       
   323 }
       
   324 
       
   325 
       
   326 // The injection and mkeps part of the lexer
       
   327 //===========================================
       
   328 
       
   329 def mkeps(r: Rexp) : Val = r match {
       
   330   case ONE => Empty
       
   331   case ALTS(r1, r2) => 
       
   332     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
       
   333   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
       
   334   case STAR(r) => Stars(Nil)
       
   335   case RECD(x, r) => Rec(x, mkeps(r))
       
   336   case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
       
   337   case OPTIONAL(r) => Optionall(Empty)
       
   338   case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")  
       
   339                          else Nots("")//Nots(s.reverse.toString)
       
   340 //   case NOT(ZERO) => Empty
       
   341 //   case NOT(CHAR(c)) => Empty
       
   342 //   case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
       
   343 //   case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
       
   344 //   case NOT(STAR(r)) => Stars(Nil) 
       
   345 
       
   346 }
       
   347 
       
   348 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
       
   349   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
   350   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
       
   351   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
       
   352   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
       
   353   case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
       
   354   case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
       
   355   case (CHAR(d), Empty) => Chr(c) 
       
   356   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
       
   357   case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
       
   358   case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
       
   359   case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
       
   360   case (ANYCHAR, Empty) => Chr(c)
       
   361 }
       
   362 
       
   363 // some "rectification" functions for simplification
       
   364 
       
   365 
       
   366 
       
   367 
       
   368 // The Lexing Rules for the WHILE Language
       
   369 
       
   370   // bnullable function: tests whether the aregular 
       
   371   // expression can recognise the empty string
       
   372 def bnullable (r: ARexp) : Boolean = r match {
       
   373     case AZERO => false
       
   374     case AONE(_) => true
       
   375     case ACHAR(_,_) => false
       
   376     case AALTS(_, rs) => rs.exists(bnullable)
       
   377     case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
       
   378     case ASTAR(_, _) => true
       
   379     case ANOT(_, rn) => !bnullable(rn)
       
   380   }
       
   381 
       
   382 def mkepsBC(r: ARexp) : Bits = r match {
       
   383     case AONE(bs) => bs
       
   384     case AALTS(bs, rs) => {
       
   385       val n = rs.indexWhere(bnullable)
       
   386       bs ++ mkepsBC(rs(n))
       
   387     }
       
   388     case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
       
   389     case ASTAR(bs, r) => bs ++ List(Z)
       
   390     case ANOT(bs, rn) => bs
       
   391   }
       
   392 
       
   393 
       
   394 def bder(c: Char, r: ARexp) : ARexp = r match {
       
   395     case AZERO => AZERO
       
   396     case AONE(_) => AZERO
       
   397     case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
       
   398     case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
       
   399     case ASEQ(bs, r1, r2) => 
       
   400       if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
       
   401       else ASEQ(bs, bder(c, r1), r2)
       
   402     case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
       
   403     case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
       
   404     case AANYCHAR(bs) => AONE(bs)
       
   405   } 
       
   406 
       
   407 def fuse(bs: Bits, r: ARexp) : ARexp = r match {
       
   408     case AZERO => AZERO
       
   409     case AONE(cs) => AONE(bs ++ cs)
       
   410     case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
       
   411     case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
       
   412     case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
       
   413     case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
       
   414     case ANOT(cs, r) => ANOT(bs ++ cs, r)
       
   415   }
       
   416 
       
   417 
       
   418 def internalise(r: Rexp) : ARexp = r match {
       
   419     case ZERO => AZERO
       
   420     case ONE => AONE(Nil)
       
   421     case CHAR(c) => ACHAR(Nil, c)
       
   422     //case PRED(f) => APRED(Nil, f)
       
   423     case ALTS(r1, r2) => 
       
   424       AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
       
   425     // case ALTS(r1::rs) => {
       
   426     //   val AALTS(Nil, rs2) = internalise(ALTS(rs))
       
   427     //   AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
       
   428     // }
       
   429     case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
       
   430     case STAR(r) => ASTAR(Nil, internalise(r))
       
   431     case RECD(x, r) => internalise(r)
       
   432     case NOT(r) => ANOT(Nil, internalise(r))
       
   433     case ANYCHAR => AANYCHAR(Nil)
       
   434   }
       
   435 
       
   436 
       
   437 def bsimp(r: ARexp): ARexp = 
       
   438   {
       
   439     r match {
       
   440       case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
       
   441           case (AZERO, _) => AZERO
       
   442           case (_, AZERO) => AZERO
       
   443           case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   444           case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   445       }
       
   446       case AALTS(bs1, rs) => {
       
   447             val rs_simp = rs.map(bsimp(_))
       
   448             val flat_res = flats(rs_simp)
       
   449             val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
       
   450             dist_res match {
       
   451               case Nil => AZERO
       
   452               case s :: Nil => fuse(bs1, s)
       
   453               case rs => AALTS(bs1, rs)  
       
   454             }
       
   455           
       
   456       }
       
   457       case r => r
       
   458     }
       
   459 }
       
   460 def strongBsimp(r: ARexp): ARexp =
       
   461 {
       
   462   r match {
       
   463     case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
       
   464         case (AZERO, _) => AZERO
       
   465         case (_, AZERO) => AZERO
       
   466         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   467         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   468     }
       
   469     case AALTS(bs1, rs) => {
       
   470           val rs_simp = rs.map(strongBsimp(_))
       
   471           val flat_res = flats(rs_simp)
       
   472           val dist_res = distinctBy4(flat_res)//distinctBy(flat_res, erase)
       
   473           dist_res match {
       
   474             case Nil => AZERO
       
   475             case s :: Nil => fuse(bs1, s)
       
   476             case rs => AALTS(bs1, rs)  
       
   477           }
       
   478         
       
   479     }
       
   480     case r => r
       
   481   }
       
   482 }
       
   483 
       
   484 def strongBsimp5(r: ARexp): ARexp =
       
   485 {
       
   486   // println("was this called?")
       
   487   r match {
       
   488     case ASEQ(bs1, r1, r2) => (strongBsimp5(r1), strongBsimp5(r2)) match {
       
   489         case (AZERO, _) => AZERO
       
   490         case (_, AZERO) => AZERO
       
   491         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   492         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   493     }
       
   494     case AALTS(bs1, rs) => {
       
   495         // println("alts case")
       
   496           val rs_simp = rs.map(strongBsimp5(_))
       
   497           val flat_res = flats(rs_simp)
       
   498           var dist_res = distinctBy5(flat_res)//distinctBy(flat_res, erase)
       
   499           var dist2_res = distinctBy5(dist_res)
       
   500           while(dist_res != dist2_res){
       
   501             dist_res = dist2_res
       
   502             dist2_res = distinctBy5(dist_res)
       
   503           }
       
   504           (dist2_res) match {
       
   505             case Nil => AZERO
       
   506             case s :: Nil => fuse(bs1, s)
       
   507             case rs => AALTS(bs1, rs)  
       
   508           }
       
   509     }
       
   510     case r => r
       
   511   }
       
   512 }
       
   513 
       
   514 def bders (s: List[Char], r: ARexp) : ARexp = s match {
       
   515   case Nil => r
       
   516   case c::s => bders(s, bder(c, r))
       
   517 }
       
   518 
       
   519 def flats(rs: List[ARexp]): List[ARexp] = rs match {
       
   520     case Nil => Nil
       
   521     case AZERO :: rs1 => flats(rs1)
       
   522     case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
       
   523     case r1 :: rs2 => r1 :: flats(rs2)
       
   524   }
       
   525 
       
   526 def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
       
   527   case Nil => Nil
       
   528   case (x::xs) => {
       
   529     val res = f(x)
       
   530     if (acc.contains(res)) distinctBy(xs, f, acc)  
       
   531     else x::distinctBy(xs, f, res::acc)
       
   532   }
       
   533 } 
       
   534 
       
   535 
       
   536 def pruneRexp(r: ARexp, allowableTerms: List[Rexp]) : ARexp = {
       
   537   r match {
       
   538     case ASEQ(bs, r1, r2) => 
       
   539       val termsTruncated = allowableTerms.collect(rt => rt match {
       
   540         case SEQ(r1p, r2p) if(r2p == erase(r2)) => r1p//if(r2p == erase(r2)) 
       
   541       })
       
   542       val pruned : ARexp = pruneRexp(r1, termsTruncated)
       
   543       pruned match {
       
   544         case AZERO => AZERO
       
   545         case AONE(bs1) => fuse(bs ++ bs1, r2)
       
   546         case pruned1 => ASEQ(bs, pruned1, r2)
       
   547       }
       
   548 
       
   549     case AALTS(bs, rs) => 
       
   550       //allowableTerms.foreach(a => println(shortRexpOutput(a)))        
       
   551       val rsp = rs.map(r => 
       
   552                     pruneRexp(r, allowableTerms)
       
   553                   )
       
   554                   .filter(r => r != AZERO)
       
   555       rsp match {
       
   556         case Nil => AZERO
       
   557         case r1::Nil => fuse(bs, r1)
       
   558         case rs1 => AALTS(bs, rs1)
       
   559       }
       
   560     case r => 
       
   561       if(allowableTerms.contains(erase(r))) r else AZERO //assert(r != AZERO)
       
   562   }
       
   563 }
       
   564 
       
   565 def oneSimp(r: Rexp) : Rexp = r match {
       
   566   case SEQ(ONE, r) => r
       
   567   case SEQ(r1, r2) => SEQ(oneSimp(r1), r2)
       
   568   case r => r//assert r != 0 
       
   569     
       
   570 }
       
   571 
       
   572 
       
   573 def distinctBy4(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
       
   574   case Nil => Nil
       
   575   case x :: xs =>
       
   576     //assert(acc.distinct == acc)
       
   577     val erased = erase(x)
       
   578     if(acc.contains(erased))
       
   579       distinctBy4(xs, acc)
       
   580     else{
       
   581       val addToAcc =  breakIntoTerms(erased).filter(r => !acc.contains(oneSimp(r)))
       
   582       //val xp = pruneRexp(x, addToAcc)
       
   583       pruneRexp(x, addToAcc) match {
       
   584         case AZERO => distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
       
   585         case xPrime => xPrime :: distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
       
   586       }
       
   587     }
       
   588 }
       
   589 
       
   590 // fun pAKC_aux :: "arexp list ⇒ arexp ⇒ rexp ⇒ arexp"
       
   591 //   where
       
   592 // "pAKC_aux rsa r ctx = (if (L (SEQ (erase r) ( ctx) )) ⊆ (L (erase (AALTs [] rsa))) then AZERO else
       
   593 //                           case r of (ASEQ bs r1 r2) ⇒ 
       
   594 //                             bsimp_ASEQ bs (pAKC_aux rsa r1 (SEQ  (erase r2) ( ctx) )) r2   |
       
   595 //                                     (AALTs bs rs) ⇒ 
       
   596 //                             bsimp_AALTs bs (flts (map (λr. pAKC_aux rsa r ( ctx) ) rs) )    |
       
   597 //                                     r             ⇒ r
       
   598 
       
   599 // def canonicalSeq(rs: List[Rexp], acc: Rexp) = rs match {
       
   600 //   case r::Nil => SEQ(r, acc)
       
   601 //   case Nil => acc
       
   602 //   case r1::r2::Nil => SEQ(SEQ(r1, r2), acc)
       
   603 // }
       
   604 
       
   605 
       
   606 //the "fake" Language interpretation: just concatenates!
       
   607 def L(acc: Rexp, rs: List[Rexp]) : Rexp = rs match {
       
   608   case Nil => acc
       
   609   case r :: rs1 => 
       
   610     // if(acc == ONE) 
       
   611     //   L(r, rs1) 
       
   612     // else
       
   613       L(SEQ(acc, r), rs1)
       
   614 }
       
   615 
       
   616 def rprint(r: Rexp) : Unit = println(shortRexpOutput(r))
       
   617 def rsprint(rs: List[Rexp]) = rs.foreach(r => println(shortRexpOutput(r)))
       
   618 def aprint(a: ARexp) = println(shortRexpOutput(erase(a)))
       
   619 def asprint(as: List[ARexp]) = as.foreach(a => println(shortRexpOutput(erase(a))))
       
   620 
       
   621 def pAKC(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
       
   622   // println("pakc")
       
   623   // println(shortRexpOutput(erase(r)))
       
   624   // println("acc")
       
   625   // rsprint(acc)
       
   626   // println("ctx---------")
       
   627   // rsprint(ctx)
       
   628   // println("ctx---------end")
       
   629   // rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp))
       
   630 
       
   631   if (breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)) {//acc.flatMap(breakIntoTerms
       
   632     AZERO
       
   633   }
       
   634   else{
       
   635     r match {
       
   636       case ASEQ(bs, r1, r2) => 
       
   637       (pAKC(acc, r1, erase(r2) :: ctx)) match{
       
   638         case AZERO => 
       
   639           AZERO
       
   640         case AONE(bs1) => 
       
   641           fuse(bs1, r2)
       
   642         case r1p => 
       
   643           ASEQ(bs, r1p, r2)
       
   644       }
       
   645       case AALTS(bs, rs0) => 
       
   646         // println("before pruning")
       
   647         // println(s"ctx is ")
       
   648         // ctx.foreach(r => println(shortRexpOutput(r)))
       
   649         // println(s"rs0 is ")
       
   650         // rs0.foreach(r => println(shortRexpOutput(erase(r))))
       
   651         // println(s"acc is ")
       
   652         // acc.foreach(r => println(shortRexpOutput(r)))
       
   653         rs0.map(r => pAKC(acc, r, ctx)).filter(_ != AZERO) match {
       
   654           case Nil => 
       
   655             // println("after pruning Nil")
       
   656             AZERO
       
   657           case r :: Nil => 
       
   658             // println("after pruning singleton")
       
   659             // println(shortRexpOutput(erase(r)))
       
   660             r 
       
   661           case rs0p => 
       
   662           // println("after pruning non-singleton")
       
   663             AALTS(bs, rs0p)
       
   664         }
       
   665       case r => r
       
   666     }
       
   667   }
       
   668 }
       
   669 
       
   670 def distinctBy5(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
       
   671   case Nil => 
       
   672     Nil
       
   673   case x :: xs => {
       
   674     val erased = erase(x)
       
   675     if(acc.contains(erased)){
       
   676       distinctBy5(xs, acc)
       
   677     }
       
   678     else{
       
   679       val pruned = pAKC(acc, x, Nil)
       
   680       val newTerms = breakIntoTerms(erase(pruned))
       
   681       pruned match {
       
   682         case AZERO => 
       
   683           distinctBy5(xs, acc)
       
   684         case xPrime => 
       
   685           xPrime :: distinctBy5(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
       
   686       }
       
   687     }
       
   688   }
       
   689 }
       
   690 
       
   691 
       
   692 def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
       
   693   case SEQ(r1, r2)  => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
       
   694   case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
       
   695   case ZERO => Nil
       
   696   case _ => r::Nil
       
   697 }
       
   698 
       
   699 
       
   700 
       
   701 def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
       
   702   case (ONE, bs) => (Empty, bs)
       
   703   case (CHAR(f), bs) => (Chr(f), bs)
       
   704   case (ALTS(r1, r2), Z::bs1) => {
       
   705       val (v, bs2) = decode_aux(r1, bs1)
       
   706       (Left(v), bs2)
       
   707   }
       
   708   case (ALTS(r1, r2), S::bs1) => {
       
   709       val (v, bs2) = decode_aux(r2, bs1)
       
   710       (Right(v), bs2)
       
   711   }
       
   712   case (SEQ(r1, r2), bs) => {
       
   713     val (v1, bs1) = decode_aux(r1, bs)
       
   714     val (v2, bs2) = decode_aux(r2, bs1)
       
   715     (Sequ(v1, v2), bs2)
       
   716   }
       
   717   case (STAR(r1), S::bs) => {
       
   718     val (v, bs1) = decode_aux(r1, bs)
       
   719     //(v)
       
   720     val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
       
   721     (Stars(v::vs), bs2)
       
   722   }
       
   723   case (STAR(_), Z::bs) => (Stars(Nil), bs)
       
   724   case (RECD(x, r1), bs) => {
       
   725     val (v, bs1) = decode_aux(r1, bs)
       
   726     (Rec(x, v), bs1)
       
   727   }
       
   728   case (NOT(r), bs) => (Nots(r.toString), bs)
       
   729 }
       
   730 
       
   731 def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
       
   732   case (v, Nil) => v
       
   733   case _ => throw new Exception("Not decodable")
       
   734 }
       
   735 
       
   736 
       
   737 
       
   738 def blexing_simp(r: Rexp, s: String) : Val = {
       
   739     val bit_code = blex_simp(internalise(r), s.toList)
       
   740     decode(r, bit_code)
       
   741 }
       
   742 def simpBlexer(r: Rexp, s: String) : Val = {
       
   743   Try(blexing_simp(r, s)).getOrElse(Failure)
       
   744 }
       
   745 
       
   746 def strong_blexing_simp(r: Rexp, s: String) : Val = {
       
   747   decode(r, strong_blex_simp(internalise(r), s.toList))
       
   748 }
       
   749 
       
   750 def strong_blexing_simp5(r: Rexp, s: String) : Val = {
       
   751   decode(r, strong_blex_simp5(internalise(r), s.toList))
       
   752 }
       
   753 
       
   754 
       
   755 def strongBlexer(r: Rexp, s: String) : Val = {
       
   756   Try(strong_blexing_simp(r, s)).getOrElse(Failure)
       
   757 }
       
   758 
       
   759 def strongBlexer5(r: Rexp, s: String): Val = {
       
   760   Try(strong_blexing_simp5(r, s)).getOrElse(Failure)
       
   761 }
       
   762 
       
   763 def strong_blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
       
   764   case Nil => {
       
   765     if (bnullable(r)) {
       
   766       //println(asize(r))
       
   767       val bits = mkepsBC(r)
       
   768       bits
       
   769     }
       
   770     else 
       
   771       throw new Exception("Not matched")
       
   772   }
       
   773   case c::cs => {
       
   774     val der_res = bder(c,r)
       
   775     val simp_res = strongBsimp(der_res)  
       
   776     strong_blex_simp(simp_res, cs)      
       
   777   }
       
   778 }
       
   779 
       
   780 def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match {
       
   781   case Nil => {
       
   782     if (bnullable(r)) {
       
   783       //println(asize(r))
       
   784       val bits = mkepsBC(r)
       
   785       bits
       
   786     }
       
   787     else 
       
   788       throw new Exception("Not matched")
       
   789   }
       
   790   case c::cs => {
       
   791     val der_res = bder(c,r)
       
   792     val simp_res = strongBsimp5(der_res)  
       
   793     strong_blex_simp5(simp_res, cs)      
       
   794   }
       
   795 }
       
   796 
       
   797 
       
   798   def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
       
   799     case Nil => r
       
   800     case c::s => 
       
   801       //println(erase(r))
       
   802       bders_simp(s, bsimp(bder(c, r)))
       
   803   }
       
   804   
       
   805   def bdersStrong5(s: List[Char], r: ARexp) : ARexp = s match {
       
   806     case Nil => r
       
   807     case c::s => bdersStrong5(s, strongBsimp5(bder(c, r)))
       
   808   }
       
   809 
       
   810   def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
       
   811 
       
   812   def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
       
   813     case Nil => r 
       
   814     case c::s => bdersStrong(s, strongBsimp(bder(c, r)))
       
   815   }
       
   816 
       
   817   def bdersStrongRexp(s: String, r: Rexp) : ARexp = bdersStrong(s.toList, internalise(r))
       
   818 
       
   819   def erase(r:ARexp): Rexp = r match{
       
   820     case AZERO => ZERO
       
   821     case AONE(_) => ONE
       
   822     case ACHAR(bs, c) => CHAR(c)
       
   823     case AALTS(bs, Nil) => ZERO
       
   824     case AALTS(bs, a::Nil) => erase(a)
       
   825     case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
       
   826     case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
       
   827     case ASTAR(cs, r)=> STAR(erase(r))
       
   828     case ANOT(bs, r) => NOT(erase(r))
       
   829     case AANYCHAR(bs) => ANYCHAR
       
   830   }
       
   831 
       
   832 
       
   833   def allCharSeq(r: Rexp) : Boolean = r match {
       
   834     case CHAR(c) => true
       
   835     case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
       
   836     case _ => false
       
   837   }
       
   838 
       
   839   def flattenSeq(r: Rexp) : String = r match {
       
   840     case CHAR(c) => c.toString
       
   841     case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
       
   842     case _ => throw new Error("flatten unflattenable rexp")
       
   843   } 
       
   844 
       
   845   def shortRexpOutput(r: Rexp) : String = r match {
       
   846       case CHAR(c) => c.toString
       
   847       case ONE => "1"
       
   848       case ZERO => "0"
       
   849       case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
       
   850       case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
       
   851       case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
       
   852       case STAR(STAR(r)) => "(..)*"// ++ shortRexpOutput(r) ++ "]*"
       
   853       case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
       
   854       //case RTOP => "RTOP"
       
   855     }
       
   856 
       
   857 
       
   858   def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
       
   859       case Nil => {
       
   860         if (bnullable(r)) {
       
   861           val bits = mkepsBC(r)
       
   862           bits
       
   863         }
       
   864         else 
       
   865           throw new Exception("Not matched")
       
   866       }
       
   867       case c::cs => {
       
   868         val der_res = bder(c,r)
       
   869         val simp_res = bsimp(der_res)  
       
   870         blex_simp(simp_res, cs)      
       
   871       }
       
   872   }
       
   873 
       
   874 
       
   875 
       
   876 
       
   877     def size(r: Rexp) : Int = r match {
       
   878       case ZERO => 1
       
   879       case ONE => 1
       
   880       case CHAR(_) => 1
       
   881       case ANYCHAR => 1
       
   882       case NOT(r0) => 1 + size(r0)
       
   883       case SEQ(r1, r2) => 1 + size(r1) + size(r2)
       
   884       case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
       
   885       case STAR(r) => 1 + size(r)
       
   886     }
       
   887 
       
   888     def asize(a: ARexp) = size(erase(a))
       
   889 
       
   890 //pder related
       
   891 type Mon = (Char, Rexp)
       
   892 type Lin = Set[Mon]
       
   893 
       
   894 def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
       
   895     case ZERO => Set()
       
   896     case ONE => rs
       
   897     case r => rs.map((re) => if (re == ONE) r else SEQ(re, r)  )   
       
   898   }
       
   899   def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
       
   900     case ZERO => Set()
       
   901     case ONE => l
       
   902     case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) }  )
       
   903   }
       
   904   def lf(r: Rexp): Lin = r match {
       
   905     case ZERO => Set()
       
   906     case ONE => Set()
       
   907     case CHAR(f) => {
       
   908       //val Some(c) = alphabet.find(f) 
       
   909       Set((f, ONE))
       
   910     }
       
   911     case ALTS(r1, r2) => {
       
   912       lf(r1 ) ++ lf(r2)
       
   913     }
       
   914     case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
       
   915     case SEQ(r1, r2) =>{
       
   916       if (nullable(r1))
       
   917         cir_prod(lf(r1), r2) ++ lf(r2)
       
   918       else
       
   919         cir_prod(lf(r1), r2) 
       
   920     }
       
   921   }
       
   922   def lfs(r: Set[Rexp]): Lin = {
       
   923     r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
       
   924   }
       
   925 
       
   926   def pder(x: Char, t: Rexp): Set[Rexp] = {
       
   927     val lft = lf(t)
       
   928     (lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
       
   929   }
       
   930   def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
       
   931     case x::xs => pders(xs, pder(x, t))
       
   932     case Nil => Set(t)
       
   933   }
       
   934   def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
       
   935     case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
       
   936     case Nil => ts 
       
   937   }
       
   938   def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
       
   939   def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
       
   940   //all implementation of partial derivatives that involve set union are potentially buggy
       
   941   //because they don't include the original regular term before they are pdered.
       
   942   //now only pderas is fixed.
       
   943   def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
       
   944   def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
       
   945   def awidth(r: Rexp) : Int = r match {
       
   946     case CHAR(c) => 1
       
   947     case SEQ(r1, r2) => awidth(r1) + awidth(r2)
       
   948     case ALTS(r1, r2) => awidth(r1) + awidth(r2)
       
   949     case ONE => 0
       
   950     case ZERO => 0
       
   951     case STAR(r) => awidth(r)
       
   952   }
       
   953   //def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
       
   954   //def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
       
   955   def o(r: Rexp) = if (nullable(r)) ONE else ZERO
       
   956   //def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
       
   957   def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
       
   958     case ZERO => Set[Rexp]()
       
   959     case ONE => Set[Rexp]()
       
   960     case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
       
   961     case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
       
   962     case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
       
   963     case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
       
   964   }
       
   965   def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
       
   966     case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
       
   967     case Nil => ts   
       
   968   }
       
   969   def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
       
   970 
       
   971 
       
   972 
       
   973 def starPrint(r: Rexp) : Unit = r match {
       
   974         
       
   975           case SEQ(head, rstar) =>
       
   976             println(shortRexpOutput(head) ++ "~STARREG")
       
   977           case STAR(rstar) =>
       
   978             println("STARREG")
       
   979           case ALTS(r1, r2) =>  
       
   980             println("(")
       
   981             starPrint(r1)
       
   982             println("+")
       
   983             starPrint(r2)
       
   984             println(")")
       
   985           case ZERO => println("0")
       
   986       }
       
   987 
       
   988 // @arg(doc = "small tests")
       
   989 def n_astar_list(d: Int) : Rexp = {
       
   990   if(d == 0) STAR("a") 
       
   991   else ALTS(STAR("a" * d), n_astar_list(d - 1))
       
   992 }
       
   993 def n_astar_alts(d: Int) : Rexp = d match {
       
   994   case 0 => n_astar_list(0)
       
   995   case d => STAR(n_astar_list(d))
       
   996   // case r1 :: r2 :: Nil => ALTS(r1, r2)
       
   997   // case r1 :: Nil => r1
       
   998   // case r :: rs => ALTS(r, n_astar_alts(rs))
       
   999   // case Nil => throw new Error("should give at least 1 elem")
       
  1000 }
       
  1001 def n_astar_aux(d: Int) = {
       
  1002   if(d == 0) n_astar_alts(0)
       
  1003   else ALTS(n_astar_alts(d), n_astar_alts(d - 1))
       
  1004 }
       
  1005 
       
  1006 def n_astar(d: Int) : Rexp = STAR(n_astar_aux(d))
       
  1007 //val STARREG = n_astar(3)
       
  1008 // ( STAR("a") | 
       
  1009 //                  ("a" | "aa").% | 
       
  1010 //                 ( "a" | "aa" | "aaa").% 
       
  1011 //                 ).%
       
  1012                 //( "a" | "aa" | "aaa" | "aaaa").% |
       
  1013                 //( "a" | "aa" | "aaa" | "aaaa" | "aaaaa").% 
       
  1014 (((STAR("a") | ( STAR("aaa")) | STAR("aaaaa"| ( STAR("aaaaaaa")) | STAR("aaaaaaaaaaa"))).%).%).%
       
  1015 
       
  1016 // @main
       
  1017 
       
  1018 def lcm(list: Seq[Int]):Int=list.foldLeft(1:Int){
       
  1019   (a, b) => b * a /
       
  1020   Stream.iterate((a,b)){case (x,y) => (y, x%y)}.dropWhile(_._2 != 0).head._1.abs
       
  1021 }
       
  1022 
       
  1023 def small() = {
       
  1024   //val pderSTAR = pderUNIV(STARREG)
       
  1025 
       
  1026   //val refSize = pderSTAR.map(size(_)).sum
       
  1027   for(n <- 6 to 6){
       
  1028     val STARREG = n_astar(n)
       
  1029     val iMax = (lcm((1 to n).toList))
       
  1030     for(i <- 1 to iMax + 50){// 100, 400, 800, 840, 841, 900 
       
  1031       val prog0 = "a" * i
       
  1032       //println(s"test: $prog0")
       
  1033       print(n)
       
  1034       print(" ")
       
  1035       // print(i)
       
  1036       // print(" ")
       
  1037       println(asize(bders_simp(prog0.toList, internalise(STARREG))))
       
  1038     }
       
  1039   }
       
  1040 }
       
  1041 
       
  1042 def generator_test() {
       
  1043 
       
  1044   // test(rexp(7), 1000) { (r: Rexp) => 
       
  1045   //   val ss = stringsFromRexp(r)
       
  1046   //   val boolList = ss.map(s => {
       
  1047   //     val simpVal = simpBlexer(r, s)
       
  1048   //     val strongVal = strongBlexer(r, s)
       
  1049   //     // println(simpVal)
       
  1050   //     // println(strongVal)
       
  1051   //     (simpVal == strongVal) && (simpVal != None) 
       
  1052   //   })
       
  1053   //   !boolList.exists(b => b == false)
       
  1054   // }
       
  1055   // test(single(STAR(ALTS(STAR(CHAR('c')),ALTS(CHAR('c'),ZERO)))), 100000) { (r: Rexp) => 
       
  1056   //   val ss = stringsFromRexp(r)
       
  1057   //   val boolList = ss.map(s => {
       
  1058   //     val bdStrong = bdersStrong(s.toList, internalise(r))
       
  1059   //     val bdStrong5 = bdersStrong5(s.toList, internalise(r))
       
  1060   //     // println(shortRexpOutput(r))
       
  1061   //     // println(s)
       
  1062   //     // println(strongVal)
       
  1063   //     // println(strongVal5)
       
  1064   //     // (strongVal == strongVal5) 
       
  1065 
       
  1066   //     if(asize(bdStrong5) > asize(bdStrong)){
       
  1067   //       println(s)
       
  1068   //       println(shortRexpOutput(erase(bdStrong5)))
       
  1069   //       println(shortRexpOutput(erase(bdStrong)))
       
  1070   //     }
       
  1071   //     asize(bdStrong5) <= asize(bdStrong)
       
  1072   //   })
       
  1073   //   !boolList.exists(b => b == false)
       
  1074   // }
       
  1075   //*** example where bdStrong5 has a smaller size than bdStrong
       
  1076   // test(single(STAR(SEQ(ALTS(SEQ(STAR(CHAR('a')),ALTS(ALTS(ONE,ZERO),SEQ(ONE,ONE))),CHAR('a')),ONE))), 1) { (r: Rexp) => 
       
  1077   //*** depth 5 example bdStrong5 larger size than bdStrong
       
  1078   // test(single(STAR(SEQ(SEQ(ALTS(CHAR('b'),STAR(CHAR('b'))),CHAR('b')),(ALTS(STAR(CHAR('c')), ONE))))), 1) {(r: Rexp) =>
       
  1079  
       
  1080  
       
  1081  
       
  1082   //sanity check from Christian's request
       
  1083   // val r = ("a" | "ab") ~ ("bc" | "c")
       
  1084   // val a = internalise(r)
       
  1085   // val aval = blexing_simp(r, "abc")
       
  1086   // println(aval)
       
  1087 
       
  1088   //sample counterexample:(depth 7)
       
  1089   //STAR(SEQ(ALTS(STAR(STAR(STAR(STAR(CHAR(c))))),ALTS(CHAR(c),CHAR(b))),ALTS(ZERO,SEQ(ALTS(ALTS(STAR(CHAR(c)),SEQ(CHAR(b),CHAR(a))),CHAR(c)),STAR(ALTS(ALTS(ONE,CHAR(a)),STAR(CHAR(c))))))))
       
  1090   //(depth5)
       
  1091   //STAR(SEQ(ALTS(ALTS(STAR(CHAR(b)),SEQ(ONE,CHAR(b))),SEQ(STAR(CHAR(a)),CHAR(b))),ALTS(ZERO,ALTS(STAR(CHAR(b)),STAR(CHAR(a))))))
       
  1092   test(rexp(4), 10000000) { (r: Rexp) => 
       
  1093   // ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) => 
       
  1094     val ss = stringsFromRexp(r)
       
  1095     val boolList = ss.map(s => {
       
  1096       val bdStrong = bdersStrong(s.toList, internalise(r))
       
  1097       val bdStrong5 = bdersStrong5(s.toList, internalise(r))
       
  1098       val bdFurther5 = strongBsimp5(bdStrong5)
       
  1099       val sizeFurther5 = asize(bdFurther5)
       
  1100       val pdersSet = pderUNIV(r)
       
  1101       val size5 = asize(bdStrong5)
       
  1102       val size0 = asize(bdStrong)
       
  1103       // println(s)
       
  1104       // println("pdersSet size")
       
  1105       // println(pdersSet.size)
       
  1106       // pdersSet.foreach(r => println(shortRexpOutput(r)))
       
  1107       // println("after bdStrong5")
       
  1108       
       
  1109       // println(shortRexpOutput(erase(bdStrong5)))
       
  1110       // println(breakIntoTerms(erase(bdStrong5)).size)
       
  1111       
       
  1112       // println("after bdStrong")
       
  1113       // println(shortRexpOutput(erase(bdStrong)))
       
  1114       // println(breakIntoTerms(erase(bdStrong)).size)
       
  1115       // println(size5, size0, sizeFurther5)
       
  1116       // aprint(strongBsimp5(bdStrong))
       
  1117       // println(asize(strongBsimp5(bdStrong5)))
       
  1118       // println(s)
       
  1119       size5 <= size0 
       
  1120     })
       
  1121     // println(boolList)
       
  1122     //!boolList.exists(b => b == false)
       
  1123     !boolList.exists(b => b == false)
       
  1124   }
       
  1125 
       
  1126 
       
  1127 }
       
  1128 small()
       
  1129 // generator_test()
       
  1130 
       
  1131 // 1
       
  1132 
       
  1133 // SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))),
       
  1134 // SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))),
       
  1135 // STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))),
       
  1136 // SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE))
       
  1137 
       
  1138 
       
  1139 // Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))
       
  1140 // Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))