RegexExplosionPlot/injToSimp.sc
changeset 666 6da4516ea87d
child 667 660cf698eb26
equal deleted inserted replaced
665:3bedbdce3a3b 666:6da4516ea87d
       
     1 // A simple lexer inspired by work of Sulzmann & Lu
       
     2 //==================================================
       
     3 //
       
     4 // Call the test cases with 
       
     5 //
       
     6 //   amm lexer.sc small
       
     7 //   amm lexer.sc fib
       
     8 //   amm lexer.sc loops
       
     9 //   amm lexer.sc email
       
    10 //
       
    11 //   amm lexer.sc all
       
    12 
       
    13 
       
    14 
       
    15 // regular expressions including records
       
    16 abstract class Rexp 
       
    17 case object ZERO extends Rexp
       
    18 case object ONE extends Rexp
       
    19 case object ANYCHAR extends Rexp
       
    20 case class CHAR(c: Char) extends Rexp
       
    21 case class ALTS(r1: Rexp, r2: Rexp) extends Rexp 
       
    22 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
       
    23 case class STAR(r: Rexp) extends Rexp 
       
    24 case class RECD(x: String, r: Rexp) extends Rexp  
       
    25 case class NTIMES(n: Int, r: Rexp) extends Rexp
       
    26 case class OPTIONAL(r: Rexp) extends Rexp
       
    27 case class NOT(r: Rexp) extends Rexp
       
    28                 // records for extracting strings or tokens
       
    29   
       
    30 // values  
       
    31 abstract class Val
       
    32 case object Failure extends Val
       
    33 case object Empty extends Val
       
    34 case class Chr(c: Char) extends Val
       
    35 case class Sequ(v1: Val, v2: Val) extends Val
       
    36 case class Left(v: Val) extends Val
       
    37 case class Right(v: Val) extends Val
       
    38 case class Stars(vs: List[Val]) extends Val
       
    39 case class Rec(x: String, v: Val) extends Val
       
    40 case class Ntime(vs: List[Val]) extends Val
       
    41 case class Optionall(v: Val) extends Val
       
    42 case class Nots(s: String) extends Val
       
    43 
       
    44 
       
    45 
       
    46 abstract class Bit
       
    47 case object Z extends Bit
       
    48 case object S extends Bit
       
    49 
       
    50 
       
    51 type Bits = List[Bit]
       
    52 
       
    53 abstract class ARexp 
       
    54 case object AZERO extends ARexp
       
    55 case class AONE(bs: Bits) extends ARexp
       
    56 case class ACHAR(bs: Bits, c: Char) extends ARexp
       
    57 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
       
    58 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
       
    59 case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
       
    60 case class ANOT(bs: Bits, r: ARexp) extends ARexp
       
    61 case class AANYCHAR(bs: Bits) extends ARexp
       
    62 
       
    63 import scala.util.Try
       
    64 
       
    65 trait Generator[+T] {
       
    66     self => // an alias for "this"
       
    67     def generate(): T
       
    68   
       
    69     def gen(n: Int) : List[T] = 
       
    70       if (n == 0) Nil else self.generate() :: gen(n - 1)
       
    71     
       
    72     def map[S](f: T => S): Generator[S] = new Generator[S] {
       
    73       def generate = f(self.generate())  
       
    74     }
       
    75     def flatMap[S](f: T => Generator[S]): Generator[S] = new Generator[S] {
       
    76       def generate = f(self.generate()).generate()
       
    77     }
       
    78 
       
    79 
       
    80 }
       
    81 
       
    82   // tests a property according to a given random generator
       
    83   def test[T](r: Generator[T], amount: Int = 100)(pred: T => Boolean) {
       
    84     for (_ <- 0 until amount) {
       
    85       val value = r.generate()
       
    86       assert(pred(value), s"Test failed for: $value")
       
    87     }
       
    88     println(s"Test passed $amount times")
       
    89   }
       
    90   def test2[T, S](r: Generator[T], s: Generator[S], amount: Int = 100)(pred: (T, S) => Boolean) {
       
    91     for (_ <- 0 until amount) {
       
    92       val valueR = r.generate()
       
    93       val valueS = s.generate()
       
    94       assert(pred(valueR, valueS), s"Test failed for: $valueR, $valueS")
       
    95     }
       
    96     println(s"Test passed $amount times")
       
    97   }
       
    98 
       
    99 // random integers
       
   100 val integers = new Generator[Int] {
       
   101   val rand = new java.util.Random
       
   102   def generate() = rand.nextInt()
       
   103 }
       
   104 
       
   105 // random booleans
       
   106 val booleans = integers.map(_ > 0)
       
   107   
       
   108 // random integers in the range lo and high  
       
   109 def range(lo: Int, hi: Int): Generator[Int] = 
       
   110   for (x <- integers) yield (lo + x.abs % (hi - lo)).abs
       
   111 
       
   112 // random characters
       
   113 def chars_range(lo: Char, hi: Char) = range(lo, hi).map(_.toChar)  
       
   114 val chars = chars_range('a', 'z')
       
   115 
       
   116 
       
   117 def oneOf[T](xs: T*): Generator[T] = 
       
   118   for (idx <- range(0, xs.length)) yield xs(idx)
       
   119   
       
   120 def single[T](x: T) = new Generator[T] {
       
   121   def generate() = x
       
   122 }   
       
   123 
       
   124 def pairs[T, U](t: Generator[T], u: Generator[U]): Generator[(T, U)] = 
       
   125   for (x <- t; y <- u) yield (x, y)
       
   126 
       
   127 // lists
       
   128 def emptyLists = single(Nil) 
       
   129 
       
   130 def nonEmptyLists : Generator[List[Int]] = 
       
   131   for (head <- integers; tail <- lists) yield head :: tail
       
   132 
       
   133 def lists: Generator[List[Int]] = for {
       
   134   kind <- booleans
       
   135   list <- if (kind) emptyLists else nonEmptyLists
       
   136 } yield list
       
   137 
       
   138 def char_list(len: Int): Generator[List[Char]] = {
       
   139   if(len <= 0) single(Nil)
       
   140   else{
       
   141     for { 
       
   142       c <- chars_range('a', 'c')
       
   143       tail <- char_list(len - 1)
       
   144     } yield c :: tail
       
   145   }
       
   146 }
       
   147 
       
   148 def strings(len: Int): Generator[String] = for(cs <- char_list(len)) yield cs.toString
       
   149 
       
   150 def sampleString(r: Rexp) : List[String] = r match {
       
   151   case STAR(r) => stringsFromRexp(r).flatMap(s => List("", s, s ++ s))//only generate 0, 1, 2 reptitions
       
   152   case SEQ(r1, r2) => stringsFromRexp(r1).flatMap(s1 => stringsFromRexp(r2).map(s2 => s1 ++ s2) )
       
   153   case ALTS(r1, r2) => throw new Error(s" Rexp ${r} not expected: all alternatives are supposed to have been opened up")
       
   154   case ONE => "" :: Nil
       
   155   case ZERO => Nil
       
   156   case CHAR(c) => c.toString :: Nil
       
   157 
       
   158 }
       
   159 
       
   160 def stringsFromRexp(r: Rexp) : List[String] = 
       
   161   breakIntoTerms(r).flatMap(r => sampleString(r))
       
   162 
       
   163 
       
   164 // (simple) binary trees
       
   165 trait Tree[T]
       
   166 case class Inner[T](left: Tree[T], right: Tree[T]) extends Tree[T]
       
   167 case class Leaf[T](x: T) extends Tree[T]
       
   168 
       
   169 def leafs[T](t: Generator[T]): Generator[Leaf[T]] = 
       
   170   for (x <- t) yield Leaf(x)
       
   171 
       
   172 def inners[T](t: Generator[T]): Generator[Inner[T]] = 
       
   173   for (l <- trees(t); r <- trees(t)) yield Inner(l, r)
       
   174 
       
   175 def trees[T](t: Generator[T]): Generator[Tree[T]] = 
       
   176   for (kind <- range(0, 2);  
       
   177        tree <- if (kind == 0) leafs(t) else inners(t)) yield tree
       
   178 
       
   179 // regular expressions
       
   180 
       
   181 // generates random leaf-regexes; prefers CHAR-regexes
       
   182 def leaf_rexp() : Generator[Rexp] =
       
   183   for (kind <- range(0, 5);
       
   184        c <- chars_range('a', 'd')) yield
       
   185     kind match {
       
   186       case 0 => ZERO
       
   187       case 1 => ONE
       
   188       case _ => CHAR(c) 
       
   189     }
       
   190 
       
   191 // generates random inner regexes with maximum depth d
       
   192 def inner_rexp(d: Int) : Generator[Rexp] =
       
   193   for (kind <- range(0, 3);
       
   194        l <- rexp(d); 
       
   195        r <- rexp(d))
       
   196   yield kind match {
       
   197     case 0 => ALTS(l, r)
       
   198     case 1 => SEQ(l, r)
       
   199     case 2 => STAR(r)
       
   200   }
       
   201 
       
   202 // generates random regexes with maximum depth d;
       
   203 // prefers inner regexes in 2/3 of the cases
       
   204 def rexp(d: Int = 100): Generator[Rexp] = 
       
   205   for (kind <- range(0, 3);
       
   206        r <- if (d <= 0 || kind == 0) leaf_rexp() else inner_rexp(d - 1)) yield r
       
   207 
       
   208 
       
   209 // some test functions for rexps
       
   210 def height(r: Rexp) : Int = r match {
       
   211   case ZERO => 1
       
   212   case ONE => 1
       
   213   case CHAR(_) => 1
       
   214   case ALTS(r1, r2) => 1 + List(height(r1), height(r2)).max
       
   215   case SEQ(r1, r2) =>  1 + List(height(r1), height(r2)).max
       
   216   case STAR(r) => 1 + height(r)
       
   217 }
       
   218 
       
   219 // def size(r: Rexp) : Int = r match {
       
   220 //   case ZERO => 1
       
   221 //   case ONE => 1
       
   222 //   case CHAR(_) => 1
       
   223 //   case ALTS(r1, r2) => 1 + size(r1) + size(r2)
       
   224 //   case SEQ(r1, r2) =>  1 + size(r1) + size(r2)
       
   225 //   case STAR(r) => 1 + size(r) 
       
   226 // }
       
   227 
       
   228 // randomly subtracts 1 or 2 from the STAR case
       
   229 def size_faulty(r: Rexp) : Int = r match {
       
   230   case ZERO => 1
       
   231   case ONE => 1
       
   232   case CHAR(_) => 1
       
   233   case ALTS(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
       
   234   case SEQ(r1, r2) =>  1 + size_faulty(r1) + size_faulty(r2)
       
   235   case STAR(r) => 1 + size_faulty(r) - range(0, 2).generate
       
   236 }
       
   237 
       
   238 
       
   239    
       
   240 // some convenience for typing in regular expressions
       
   241 
       
   242 def charlist2rexp(s : List[Char]): Rexp = s match {
       
   243   case Nil => ONE
       
   244   case c::Nil => CHAR(c)
       
   245   case c::s => SEQ(CHAR(c), charlist2rexp(s))
       
   246 }
       
   247 implicit def string2rexp(s : String) : Rexp = 
       
   248   charlist2rexp(s.toList)
       
   249 
       
   250 implicit def RexpOps(r: Rexp) = new {
       
   251   def | (s: Rexp) = ALTS(r, s)
       
   252   def % = STAR(r)
       
   253   def ~ (s: Rexp) = SEQ(r, s)
       
   254 }
       
   255 
       
   256 implicit def stringOps(s: String) = new {
       
   257   def | (r: Rexp) = ALTS(s, r)
       
   258   def | (r: String) = ALTS(s, r)
       
   259   def % = STAR(s)
       
   260   def ~ (r: Rexp) = SEQ(s, r)
       
   261   def ~ (r: String) = SEQ(s, r)
       
   262   def $ (r: Rexp) = RECD(s, r)
       
   263 }
       
   264 
       
   265 def nullable(r: Rexp) : Boolean = r match {
       
   266   case ZERO => false
       
   267   case ONE => true
       
   268   case CHAR(_) => false
       
   269   case ANYCHAR => false
       
   270   case ALTS(r1, r2) => nullable(r1) || nullable(r2)
       
   271   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
   272   case STAR(_) => true
       
   273   case RECD(_, r1) => nullable(r1)
       
   274   case NTIMES(n, r) => if (n == 0) true else nullable(r)
       
   275   case OPTIONAL(r) => true
       
   276   case NOT(r) => !nullable(r)
       
   277 }
       
   278 
       
   279 def der(c: Char, r: Rexp) : Rexp = r match {
       
   280   case ZERO => ZERO
       
   281   case ONE => ZERO
       
   282   case CHAR(d) => if (c == d) ONE else ZERO
       
   283   case ANYCHAR => ONE 
       
   284   case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
       
   285   case SEQ(r1, r2) => 
       
   286     if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
       
   287     else SEQ(der(c, r1), r2)
       
   288   case STAR(r) => SEQ(der(c, r), STAR(r))
       
   289   case RECD(_, r1) => der(c, r1)
       
   290   case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
       
   291   case OPTIONAL(r) => der(c, r)
       
   292   case NOT(r) =>  NOT(der(c, r))
       
   293 }
       
   294 
       
   295 
       
   296 // extracts a string from a value
       
   297 def flatten(v: Val) : String = v match {
       
   298   case Empty => ""
       
   299   case Chr(c) => c.toString
       
   300   case Left(v) => flatten(v)
       
   301   case Right(v) => flatten(v)
       
   302   case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
       
   303   case Stars(vs) => vs.map(flatten).mkString
       
   304   case Ntime(vs) => vs.map(flatten).mkString
       
   305   case Optionall(v) => flatten(v)
       
   306   case Rec(_, v) => flatten(v)
       
   307 }
       
   308 
       
   309 
       
   310 // extracts an environment from a value;
       
   311 // used for tokenising a string
       
   312 def env(v: Val) : List[(String, String)] = v match {
       
   313   case Empty => Nil
       
   314   case Chr(c) => Nil
       
   315   case Left(v) => env(v)
       
   316   case Right(v) => env(v)
       
   317   case Sequ(v1, v2) => env(v1) ::: env(v2)
       
   318   case Stars(vs) => vs.flatMap(env)
       
   319   case Ntime(vs) => vs.flatMap(env)
       
   320   case Rec(x, v) => (x, flatten(v))::env(v)
       
   321   case Optionall(v) => env(v)
       
   322   case Nots(s) => ("Negative", s) :: Nil
       
   323 }
       
   324 
       
   325 
       
   326 // The injection and mkeps part of the lexer
       
   327 //===========================================
       
   328 
       
   329 def mkeps(r: Rexp) : Val = r match {
       
   330   case ONE => Empty
       
   331   case ALTS(r1, r2) => 
       
   332     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
       
   333   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
       
   334   case STAR(r) => Stars(Nil)
       
   335   case RECD(x, r) => Rec(x, mkeps(r))
       
   336   case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
       
   337   case OPTIONAL(r) => Optionall(Empty)
       
   338   case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")  
       
   339                          else Nots("")//Nots(s.reverse.toString)
       
   340 //   case NOT(ZERO) => Empty
       
   341 //   case NOT(CHAR(c)) => Empty
       
   342 //   case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
       
   343 //   case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
       
   344 //   case NOT(STAR(r)) => Stars(Nil) 
       
   345 
       
   346 }
       
   347 
       
   348 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
       
   349   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
   350   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
       
   351   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
       
   352   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
       
   353   case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
       
   354   case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
       
   355   case (CHAR(d), Empty) => Chr(c) 
       
   356   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
       
   357   case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
       
   358   case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
       
   359   case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
       
   360   case (ANYCHAR, Empty) => Chr(c)
       
   361 }
       
   362 
       
   363     def size(r: Rexp) : Int = r match {
       
   364       case ZERO => 1
       
   365       case ONE => 1
       
   366       case CHAR(_) => 1
       
   367       case ANYCHAR => 1
       
   368       case NOT(r0) => 1 + size(r0)
       
   369       case SEQ(r1, r2) => 1 + size(r1) + size(r2)
       
   370       case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
       
   371       case STAR(r) => 1 + size(r)
       
   372     }
       
   373 
       
   374 //almost as if a zeroable in called on each subexpression, and if so, turn that sub-expression into 0
       
   375 def light_simp(r: Rexp): Rexp = r match {
       
   376   case ALTS(r1, r2) => (light_simp(r1), light_simp(r2)) match {
       
   377     case (ZERO, ZERO) => ZERO
       
   378     case (r1p, r2p) => ALTS(r1p, r2p)
       
   379   }
       
   380   case SEQ(r1, r2) => (light_simp(r1), light_simp(r2)) match {
       
   381     case (ZERO, _) => ZERO
       
   382     case (_, ZERO) => ZERO
       
   383     case (r1p, r2p) => SEQ(r1p, r2p)
       
   384   }
       
   385   case STAR(r) => STAR(r)
       
   386   case CHAR(d) => CHAR(d)
       
   387   case ONE => ONE
       
   388   case ZERO => ZERO
       
   389 }
       
   390     
       
   391 
       
   392 
       
   393 def simp_lexer(r: Rexp, s: List[Char]): Val = s match{
       
   394   case Nil => {//println("mkeps!"); println(r); 
       
   395     val v = mkeps(r); 
       
   396     //println(v); 
       
   397     v}
       
   398   case c::cs => { 
       
   399     val derc1 = der(c, r);
       
   400     val derc = light_simp(derc1);
       
   401     println("derivative size: "+ size(derc));
       
   402     val v = simp_lexer(derc, cs); 
       
   403     //println("Injection!");println("r: "+r); println("c: "+c); println("v:"+v); 
       
   404     inj(r, c, v ) }
       
   405 }
       
   406 
       
   407 
       
   408 def print_lexer(r: Rexp, s: List[Char]): Val = s match{
       
   409   case Nil => {//println("mkeps!"); println(r); 
       
   410     val v = mkeps(r); 
       
   411     //println(v); 
       
   412     v}
       
   413   case c::cs => { 
       
   414     val derc = der(c, r);
       
   415     //val derc = light_simp(derc1);
       
   416     println("derivative size: "+ size(derc));
       
   417     val v = print_lexer(derc, cs); 
       
   418     //println("Injection!");println("r: "+r); println("c: "+c); println("v:"+v); 
       
   419     inj(r, c, v ) }
       
   420 }
       
   421 val r = STAR("a"|"aa") ~ ("c")
       
   422 println("unsimplified lexer's intermediate derivative sizes:")
       
   423 val v = print_lexer(r, "aac".toList)
       
   424 println("simplified lexer's intermediate derivative sizes:")
       
   425 val v2 = simp_lexer(r, "aac".toList)
       
   426 //println(v)
       
   427 
       
   428 // some "rectification" functions for simplification
       
   429 
       
   430 
       
   431 
       
   432 
       
   433 // The Lexing Rules for the WHILE Language
       
   434 
       
   435   // bnullable function: tests whether the aregular 
       
   436   // expression can recognise the empty string
       
   437 def bnullable (r: ARexp) : Boolean = r match {
       
   438     case AZERO => false
       
   439     case AONE(_) => true
       
   440     case ACHAR(_,_) => false
       
   441     case AALTS(_, rs) => rs.exists(bnullable)
       
   442     case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
       
   443     case ASTAR(_, _) => true
       
   444     case ANOT(_, rn) => !bnullable(rn)
       
   445   }
       
   446 
       
   447 def mkepsBC(r: ARexp) : Bits = r match {
       
   448     case AONE(bs) => bs
       
   449     case AALTS(bs, rs) => {
       
   450       val n = rs.indexWhere(bnullable)
       
   451       bs ++ mkepsBC(rs(n))
       
   452     }
       
   453     case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
       
   454     case ASTAR(bs, r) => bs ++ List(Z)
       
   455     case ANOT(bs, rn) => bs
       
   456   }
       
   457 
       
   458 
       
   459 def bder(c: Char, r: ARexp) : ARexp = r match {
       
   460     case AZERO => AZERO
       
   461     case AONE(_) => AZERO
       
   462     case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
       
   463     case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
       
   464     case ASEQ(bs, r1, r2) => 
       
   465       if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
       
   466       else ASEQ(bs, bder(c, r1), r2)
       
   467     case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
       
   468     case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
       
   469     case AANYCHAR(bs) => AONE(bs)
       
   470   } 
       
   471 
       
   472 def fuse(bs: Bits, r: ARexp) : ARexp = r match {
       
   473     case AZERO => AZERO
       
   474     case AONE(cs) => AONE(bs ++ cs)
       
   475     case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
       
   476     case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
       
   477     case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
       
   478     case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
       
   479     case ANOT(cs, r) => ANOT(bs ++ cs, r)
       
   480   }
       
   481 
       
   482 
       
   483 def internalise(r: Rexp) : ARexp = r match {
       
   484     case ZERO => AZERO
       
   485     case ONE => AONE(Nil)
       
   486     case CHAR(c) => ACHAR(Nil, c)
       
   487     //case PRED(f) => APRED(Nil, f)
       
   488     case ALTS(r1, r2) => 
       
   489       AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
       
   490     // case ALTS(r1::rs) => {
       
   491     //   val AALTS(Nil, rs2) = internalise(ALTS(rs))
       
   492     //   AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
       
   493     // }
       
   494     case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
       
   495     case STAR(r) => ASTAR(Nil, internalise(r))
       
   496     case RECD(x, r) => internalise(r)
       
   497     case NOT(r) => ANOT(Nil, internalise(r))
       
   498     case ANYCHAR => AANYCHAR(Nil)
       
   499   }
       
   500 
       
   501 
       
   502 def bsimp(r: ARexp): ARexp = 
       
   503   {
       
   504     r match {
       
   505       case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
       
   506           case (AZERO, _) => AZERO
       
   507           case (_, AZERO) => AZERO
       
   508           case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   509           case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   510       }
       
   511       case AALTS(bs1, rs) => {
       
   512             val rs_simp = rs.map(bsimp(_))
       
   513             val flat_res = flats(rs_simp)
       
   514             val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
       
   515             dist_res match {
       
   516               case Nil => AZERO
       
   517               case s :: Nil => fuse(bs1, s)
       
   518               case rs => AALTS(bs1, rs)  
       
   519             }
       
   520           
       
   521       }
       
   522       case r => r
       
   523     }
       
   524 }
       
   525 def strongBsimp(r: ARexp): ARexp =
       
   526 {
       
   527   r match {
       
   528     case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
       
   529         case (AZERO, _) => AZERO
       
   530         case (_, AZERO) => AZERO
       
   531         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   532         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   533     }
       
   534     case AALTS(bs1, rs) => {
       
   535           val rs_simp = rs.map(strongBsimp(_))
       
   536           val flat_res = flats(rs_simp)
       
   537           val dist_res = distinctBy4(flat_res)//distinctBy(flat_res, erase)
       
   538           dist_res match {
       
   539             case Nil => AZERO
       
   540             case s :: Nil => fuse(bs1, s)
       
   541             case rs => AALTS(bs1, rs)  
       
   542           }
       
   543         
       
   544     }
       
   545     case r => r
       
   546   }
       
   547 }
       
   548 
       
   549 def strongBsimp5(r: ARexp): ARexp =
       
   550 {
       
   551   // println("was this called?")
       
   552   r match {
       
   553     case ASEQ(bs1, r1, r2) => (strongBsimp5(r1), strongBsimp5(r2)) match {
       
   554         case (AZERO, _) => AZERO
       
   555         case (_, AZERO) => AZERO
       
   556         case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
       
   557         case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
       
   558     }
       
   559     case AALTS(bs1, rs) => {
       
   560         // println("alts case")
       
   561           val rs_simp = rs.map(strongBsimp5(_))
       
   562           val flat_res = flats(rs_simp)
       
   563           var dist_res = distinctBy5(flat_res)//distinctBy(flat_res, erase)
       
   564           var dist2_res = distinctBy5(dist_res)
       
   565           while(dist_res != dist2_res){
       
   566             dist_res = dist2_res
       
   567             dist2_res = distinctBy5(dist_res)
       
   568           }
       
   569           (dist2_res) match {
       
   570             case Nil => AZERO
       
   571             case s :: Nil => fuse(bs1, s)
       
   572             case rs => AALTS(bs1, rs)  
       
   573           }
       
   574     }
       
   575     case r => r
       
   576   }
       
   577 }
       
   578 
       
   579 def bders (s: List[Char], r: ARexp) : ARexp = s match {
       
   580   case Nil => r
       
   581   case c::s => bders(s, bder(c, r))
       
   582 }
       
   583 
       
   584 def flats(rs: List[ARexp]): List[ARexp] = rs match {
       
   585     case Nil => Nil
       
   586     case AZERO :: rs1 => flats(rs1)
       
   587     case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
       
   588     case r1 :: rs2 => r1 :: flats(rs2)
       
   589   }
       
   590 
       
   591 def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
       
   592   case Nil => Nil
       
   593   case (x::xs) => {
       
   594     val res = f(x)
       
   595     if (acc.contains(res)) distinctBy(xs, f, acc)  
       
   596     else x::distinctBy(xs, f, res::acc)
       
   597   }
       
   598 } 
       
   599 
       
   600 
       
   601 def pruneRexp(r: ARexp, allowableTerms: List[Rexp]) : ARexp = {
       
   602   r match {
       
   603     case ASEQ(bs, r1, r2) => 
       
   604       val termsTruncated = allowableTerms.collect(rt => rt match {
       
   605         case SEQ(r1p, r2p) if(r2p == erase(r2)) => r1p//if(r2p == erase(r2)) 
       
   606       })
       
   607       val pruned : ARexp = pruneRexp(r1, termsTruncated)
       
   608       pruned match {
       
   609         case AZERO => AZERO
       
   610         case AONE(bs1) => fuse(bs ++ bs1, r2)
       
   611         case pruned1 => ASEQ(bs, pruned1, r2)
       
   612       }
       
   613 
       
   614     case AALTS(bs, rs) => 
       
   615       //allowableTerms.foreach(a => println(shortRexpOutput(a)))        
       
   616       val rsp = rs.map(r => 
       
   617                     pruneRexp(r, allowableTerms)
       
   618                   )
       
   619                   .filter(r => r != AZERO)
       
   620       rsp match {
       
   621         case Nil => AZERO
       
   622         case r1::Nil => fuse(bs, r1)
       
   623         case rs1 => AALTS(bs, rs1)
       
   624       }
       
   625     case r => 
       
   626       if(allowableTerms.contains(erase(r))) r else AZERO //assert(r != AZERO)
       
   627   }
       
   628 }
       
   629 
       
   630 def oneSimp(r: Rexp) : Rexp = r match {
       
   631   case SEQ(ONE, r) => r
       
   632   case SEQ(r1, r2) => SEQ(oneSimp(r1), r2)
       
   633   case r => r//assert r != 0 
       
   634     
       
   635 }
       
   636 
       
   637 
       
   638 def distinctBy4(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
       
   639   case Nil => Nil
       
   640   case x :: xs =>
       
   641     //assert(acc.distinct == acc)
       
   642     val erased = erase(x)
       
   643     if(acc.contains(erased))
       
   644       distinctBy4(xs, acc)
       
   645     else{
       
   646       val addToAcc =  breakIntoTerms(erased).filter(r => !acc.contains(oneSimp(r)))
       
   647       //val xp = pruneRexp(x, addToAcc)
       
   648       pruneRexp(x, addToAcc) match {
       
   649         case AZERO => distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
       
   650         case xPrime => xPrime :: distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
       
   651       }
       
   652     }
       
   653 }
       
   654 
       
   655 // fun pAKC_aux :: "arexp list ⇒ arexp ⇒ rexp ⇒ arexp"
       
   656 //   where
       
   657 // "pAKC_aux rsa r ctx = (if (L (SEQ (erase r) ( ctx) )) ⊆ (L (erase (AALTs [] rsa))) then AZERO else
       
   658 //                           case r of (ASEQ bs r1 r2) ⇒ 
       
   659 //                             bsimp_ASEQ bs (pAKC_aux rsa r1 (SEQ  (erase r2) ( ctx) )) r2   |
       
   660 //                                     (AALTs bs rs) ⇒ 
       
   661 //                             bsimp_AALTs bs (flts (map (λr. pAKC_aux rsa r ( ctx) ) rs) )    |
       
   662 //                                     r             ⇒ r
       
   663 
       
   664 // def canonicalSeq(rs: List[Rexp], acc: Rexp) = rs match {
       
   665 //   case r::Nil => SEQ(r, acc)
       
   666 //   case Nil => acc
       
   667 //   case r1::r2::Nil => SEQ(SEQ(r1, r2), acc)
       
   668 // }
       
   669 
       
   670 
       
   671 //the "fake" Language interpretation: just concatenates!
       
   672 def L(acc: Rexp, rs: List[Rexp]) : Rexp = rs match {
       
   673   case Nil => acc
       
   674   case r :: rs1 => 
       
   675     // if(acc == ONE) 
       
   676     //   L(r, rs1) 
       
   677     // else
       
   678       L(SEQ(acc, r), rs1)
       
   679 }
       
   680 
       
   681 def rprint(r: Rexp) : Unit = println(shortRexpOutput(r))
       
   682 def rsprint(rs: List[Rexp]) = rs.foreach(r => println(shortRexpOutput(r)))
       
   683 def aprint(a: ARexp) = println(shortRexpOutput(erase(a)))
       
   684 def asprint(as: List[ARexp]) = as.foreach(a => println(shortRexpOutput(erase(a))))
       
   685 
       
   686 def pAKC(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
       
   687   // println("pakc")
       
   688   // println(shortRexpOutput(erase(r)))
       
   689   // println("acc")
       
   690   // rsprint(acc)
       
   691   // println("ctx---------")
       
   692   // rsprint(ctx)
       
   693   // println("ctx---------end")
       
   694   // rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp))
       
   695 
       
   696   if (breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)) {//acc.flatMap(breakIntoTerms
       
   697     AZERO
       
   698   }
       
   699   else{
       
   700     r match {
       
   701       case ASEQ(bs, r1, r2) => 
       
   702       (pAKC(acc, r1, erase(r2) :: ctx)) match{
       
   703         case AZERO => 
       
   704           AZERO
       
   705         case AONE(bs1) => 
       
   706           fuse(bs1, r2)
       
   707         case r1p => 
       
   708           ASEQ(bs, r1p, r2)
       
   709       }
       
   710       case AALTS(bs, rs0) => 
       
   711         // println("before pruning")
       
   712         // println(s"ctx is ")
       
   713         // ctx.foreach(r => println(shortRexpOutput(r)))
       
   714         // println(s"rs0 is ")
       
   715         // rs0.foreach(r => println(shortRexpOutput(erase(r))))
       
   716         // println(s"acc is ")
       
   717         // acc.foreach(r => println(shortRexpOutput(r)))
       
   718         rs0.map(r => pAKC(acc, r, ctx)).filter(_ != AZERO) match {
       
   719           case Nil => 
       
   720             // println("after pruning Nil")
       
   721             AZERO
       
   722           case r :: Nil => 
       
   723             // println("after pruning singleton")
       
   724             // println(shortRexpOutput(erase(r)))
       
   725             r 
       
   726           case rs0p => 
       
   727           // println("after pruning non-singleton")
       
   728             AALTS(bs, rs0p)
       
   729         }
       
   730       case r => r
       
   731     }
       
   732   }
       
   733 }
       
   734 
       
   735 def distinctBy5(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
       
   736   case Nil => 
       
   737     Nil
       
   738   case x :: xs => {
       
   739     val erased = erase(x)
       
   740     if(acc.contains(erased)){
       
   741       distinctBy5(xs, acc)
       
   742     }
       
   743     else{
       
   744       val pruned = pAKC(acc, x, Nil)
       
   745       val newTerms = breakIntoTerms(erase(pruned))
       
   746       pruned match {
       
   747         case AZERO => 
       
   748           distinctBy5(xs, acc)
       
   749         case xPrime => 
       
   750           xPrime :: distinctBy5(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
       
   751       }
       
   752     }
       
   753   }
       
   754 }
       
   755 
       
   756 
       
   757 def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
       
   758   case SEQ(r1, r2)  => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
       
   759   case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
       
   760   case ZERO => Nil
       
   761   case _ => r::Nil
       
   762 }
       
   763 
       
   764 
       
   765 
       
   766 def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
       
   767   case (ONE, bs) => (Empty, bs)
       
   768   case (CHAR(f), bs) => (Chr(f), bs)
       
   769   case (ALTS(r1, r2), Z::bs1) => {
       
   770       val (v, bs2) = decode_aux(r1, bs1)
       
   771       (Left(v), bs2)
       
   772   }
       
   773   case (ALTS(r1, r2), S::bs1) => {
       
   774       val (v, bs2) = decode_aux(r2, bs1)
       
   775       (Right(v), bs2)
       
   776   }
       
   777   case (SEQ(r1, r2), bs) => {
       
   778     val (v1, bs1) = decode_aux(r1, bs)
       
   779     val (v2, bs2) = decode_aux(r2, bs1)
       
   780     (Sequ(v1, v2), bs2)
       
   781   }
       
   782   case (STAR(r1), S::bs) => {
       
   783     val (v, bs1) = decode_aux(r1, bs)
       
   784     //(v)
       
   785     val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
       
   786     (Stars(v::vs), bs2)
       
   787   }
       
   788   case (STAR(_), Z::bs) => (Stars(Nil), bs)
       
   789   case (RECD(x, r1), bs) => {
       
   790     val (v, bs1) = decode_aux(r1, bs)
       
   791     (Rec(x, v), bs1)
       
   792   }
       
   793   case (NOT(r), bs) => (Nots(r.toString), bs)
       
   794 }
       
   795 
       
   796 def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
       
   797   case (v, Nil) => v
       
   798   case _ => throw new Exception("Not decodable")
       
   799 }
       
   800 
       
   801 
       
   802 def blexing(r: Rexp, s: String) : Option[Val] = {
       
   803     val bit_code = blex(internalise(r), s.toList)
       
   804     bit_code match {
       
   805       case Some(bits) => Some(decode(r, bits))
       
   806       case None => None
       
   807     }
       
   808 }
       
   809 
       
   810 def blexing_simp(r: Rexp, s: String) : Option[Val] = {
       
   811     val bit_code = blex_simp(internalise(r), s.toList)
       
   812     bit_code match {
       
   813       case Some(bits) => Some(decode(r, bits))
       
   814       case None => None
       
   815     }
       
   816 }
       
   817 //def simpBlexer(r: Rexp, s: String) : Val = {
       
   818 //  Try(blexing_simp(r, s)).getOrElse(Failure)
       
   819 //}
       
   820 
       
   821 def strong_blexing_simp(r: Rexp, s: String) : Val = {
       
   822   decode(r, strong_blex_simp(internalise(r), s.toList))
       
   823 }
       
   824 
       
   825 def strong_blexing_simp5(r: Rexp, s: String) : Val = {
       
   826   decode(r, strong_blex_simp5(internalise(r), s.toList))
       
   827 }
       
   828 
       
   829 
       
   830 def strongBlexer(r: Rexp, s: String) : Val = {
       
   831   Try(strong_blexing_simp(r, s)).getOrElse(Failure)
       
   832 }
       
   833 
       
   834 def strongBlexer5(r: Rexp, s: String): Val = {
       
   835   Try(strong_blexing_simp5(r, s)).getOrElse(Failure)
       
   836 }
       
   837 
       
   838 def strong_blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
       
   839   case Nil => {
       
   840     if (bnullable(r)) {
       
   841       //println(asize(r))
       
   842       val bits = mkepsBC(r)
       
   843       bits
       
   844     }
       
   845     else 
       
   846       throw new Exception("Not matched")
       
   847   }
       
   848   case c::cs => {
       
   849     val der_res = bder(c,r)
       
   850     val simp_res = strongBsimp(der_res)  
       
   851     strong_blex_simp(simp_res, cs)      
       
   852   }
       
   853 }
       
   854 
       
   855 def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match {
       
   856   case Nil => {
       
   857     if (bnullable(r)) {
       
   858       //println(asize(r))
       
   859       val bits = mkepsBC(r)
       
   860       bits
       
   861     }
       
   862     else 
       
   863       throw new Exception("Not matched")
       
   864   }
       
   865   case c::cs => {
       
   866     val der_res = bder(c,r)
       
   867     val simp_res = strongBsimp5(der_res)  
       
   868     strong_blex_simp5(simp_res, cs)      
       
   869   }
       
   870 }
       
   871 
       
   872 
       
   873   def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
       
   874     case Nil => r
       
   875     case c::s => 
       
   876       //println(erase(r))
       
   877       bders_simp(s, bsimp(bder(c, r)))
       
   878   }
       
   879   
       
   880   def bdersStrong5(s: List[Char], r: ARexp) : ARexp = s match {
       
   881     case Nil => r
       
   882     case c::s => bdersStrong5(s, strongBsimp5(bder(c, r)))
       
   883   }
       
   884 
       
   885   def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
       
   886 
       
   887   def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
       
   888     case Nil => r 
       
   889     case c::s => bdersStrong(s, strongBsimp(bder(c, r)))
       
   890   }
       
   891 
       
   892   def bdersStrongRexp(s: String, r: Rexp) : ARexp = bdersStrong(s.toList, internalise(r))
       
   893 
       
   894   def erase(r:ARexp): Rexp = r match{
       
   895     case AZERO => ZERO
       
   896     case AONE(_) => ONE
       
   897     case ACHAR(bs, c) => CHAR(c)
       
   898     case AALTS(bs, Nil) => ZERO
       
   899     case AALTS(bs, a::Nil) => erase(a)
       
   900     case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
       
   901     case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
       
   902     case ASTAR(cs, r)=> STAR(erase(r))
       
   903     case ANOT(bs, r) => NOT(erase(r))
       
   904     case AANYCHAR(bs) => ANYCHAR
       
   905   }
       
   906 
       
   907 
       
   908   def allCharSeq(r: Rexp) : Boolean = r match {
       
   909     case CHAR(c) => true
       
   910     case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
       
   911     case _ => false
       
   912   }
       
   913 
       
   914   def flattenSeq(r: Rexp) : String = r match {
       
   915     case CHAR(c) => c.toString
       
   916     case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
       
   917     case _ => throw new Error("flatten unflattenable rexp")
       
   918   } 
       
   919 
       
   920   def shortRexpOutput(r: Rexp) : String = r match {
       
   921       case CHAR(c) => c.toString
       
   922       case ONE => "1"
       
   923       case ZERO => "0"
       
   924       case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
       
   925       case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
       
   926       case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
       
   927       case STAR(STAR(r)) => "(..)*"// ++ shortRexpOutput(r) ++ "]*"
       
   928       case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
       
   929       //case RTOP => "RTOP"
       
   930     }
       
   931 
       
   932   def blex(r: ARexp, s: List[Char]) : Option[Bits]= s match {
       
   933       case Nil => {
       
   934         if (bnullable(r)) {
       
   935           val bits = mkepsBC(r)
       
   936           Some(bits)
       
   937         }
       
   938         else 
       
   939           None
       
   940           //throw new Exception("Not matched")
       
   941       }
       
   942       case c::cs => {
       
   943         val der_res = bder(c,r)
       
   944         //val simp_res = bsimp(der_res)  
       
   945         blex(der_res, cs)      
       
   946       }
       
   947   }
       
   948 
       
   949   def blex_simp(r: ARexp, s: List[Char]) : Option[Bits]= s match {
       
   950       case Nil => {
       
   951         if (bnullable(r)) {
       
   952           val bits = mkepsBC(r)
       
   953           Some(bits)
       
   954         }
       
   955         else 
       
   956           None
       
   957           //throw new Exception("Not matched")
       
   958       }
       
   959       case c::cs => {
       
   960         val der_res = bder(c,r)
       
   961         val simp_res = bsimp(der_res)  
       
   962         blex_simp(simp_res, cs)      
       
   963       }
       
   964   }
       
   965 
       
   966 
       
   967 
       
   968 
       
   969 
       
   970     def asize(a: ARexp) = size(erase(a))
       
   971 
       
   972 //pder related
       
   973 type Mon = (Char, Rexp)
       
   974 type Lin = Set[Mon]
       
   975 
       
   976 def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
       
   977     case ZERO => Set()
       
   978     case ONE => rs
       
   979     case r => rs.map((re) => if (re == ONE) r else SEQ(re, r)  )   
       
   980   }
       
   981   def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
       
   982     case ZERO => Set()
       
   983     case ONE => l
       
   984     case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) }  )
       
   985   }
       
   986   def lf(r: Rexp): Lin = r match {
       
   987     case ZERO => Set()
       
   988     case ONE => Set()
       
   989     case CHAR(f) => {
       
   990       //val Some(c) = alphabet.find(f) 
       
   991       Set((f, ONE))
       
   992     }
       
   993     case ALTS(r1, r2) => {
       
   994       lf(r1 ) ++ lf(r2)
       
   995     }
       
   996     case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
       
   997     case SEQ(r1, r2) =>{
       
   998       if (nullable(r1))
       
   999         cir_prod(lf(r1), r2) ++ lf(r2)
       
  1000       else
       
  1001         cir_prod(lf(r1), r2) 
       
  1002     }
       
  1003   }
       
  1004   def lfs(r: Set[Rexp]): Lin = {
       
  1005     r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
       
  1006   }
       
  1007 
       
  1008   def pder(x: Char, t: Rexp): Set[Rexp] = {
       
  1009     val lft = lf(t)
       
  1010     (lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
       
  1011   }
       
  1012   def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
       
  1013     case x::xs => pders(xs, pder(x, t))
       
  1014     case Nil => Set(t)
       
  1015   }
       
  1016   def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
       
  1017     case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
       
  1018     case Nil => ts 
       
  1019   }
       
  1020   def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
       
  1021   def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
       
  1022   //all implementation of partial derivatives that involve set union are potentially buggy
       
  1023   //because they don't include the original regular term before they are pdered.
       
  1024   //now only pderas is fixed.
       
  1025   def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
       
  1026   def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
       
  1027   def awidth(r: Rexp) : Int = r match {
       
  1028     case CHAR(c) => 1
       
  1029     case SEQ(r1, r2) => awidth(r1) + awidth(r2)
       
  1030     case ALTS(r1, r2) => awidth(r1) + awidth(r2)
       
  1031     case ONE => 0
       
  1032     case ZERO => 0
       
  1033     case STAR(r) => awidth(r)
       
  1034   }
       
  1035   //def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
       
  1036   //def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
       
  1037   def o(r: Rexp) = if (nullable(r)) ONE else ZERO
       
  1038   //def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
       
  1039   def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
       
  1040     case ZERO => Set[Rexp]()
       
  1041     case ONE => Set[Rexp]()
       
  1042     case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
       
  1043     case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
       
  1044     case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
       
  1045     case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
       
  1046   }
       
  1047   def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
       
  1048     case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
       
  1049     case Nil => ts   
       
  1050   }
       
  1051   def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
       
  1052 
       
  1053 
       
  1054 
       
  1055 def starPrint(r: Rexp) : Unit = r match {
       
  1056         
       
  1057           case SEQ(head, rstar) =>
       
  1058             println(shortRexpOutput(head) ++ "~STARREG")
       
  1059           case STAR(rstar) =>
       
  1060             println("STARREG")
       
  1061           case ALTS(r1, r2) =>  
       
  1062             println("(")
       
  1063             starPrint(r1)
       
  1064             println("+")
       
  1065             starPrint(r2)
       
  1066             println(")")
       
  1067           case ZERO => println("0")
       
  1068       }
       
  1069 
       
  1070 // @arg(doc = "small tests")
       
  1071 def n_astar_list(d: Int) : Rexp = {
       
  1072   if(d == 0) STAR("a") 
       
  1073   else ALTS(STAR("a" * d), n_astar_list(d - 1))
       
  1074 }
       
  1075 def n_astar_alts(d: Int) : Rexp = d match {
       
  1076   case 0 => n_astar_list(0)
       
  1077   case d => STAR(n_astar_list(d))
       
  1078   // case r1 :: r2 :: Nil => ALTS(r1, r2)
       
  1079   // case r1 :: Nil => r1
       
  1080   // case r :: rs => ALTS(r, n_astar_alts(rs))
       
  1081   // case Nil => throw new Error("should give at least 1 elem")
       
  1082 }
       
  1083 def n_astar_aux(d: Int) = {
       
  1084   if(d == 0) n_astar_alts(0)
       
  1085   else ALTS(n_astar_alts(d), n_astar_alts(d - 1))
       
  1086 }
       
  1087 
       
  1088 def n_astar(d: Int) : Rexp = STAR(n_astar_aux(d))
       
  1089 //val STARREG = n_astar(3)
       
  1090 // ( STAR("a") | 
       
  1091 //                  ("a" | "aa").% | 
       
  1092 //                 ( "a" | "aa" | "aaa").% 
       
  1093 //                 ).%
       
  1094                 //( "a" | "aa" | "aaa" | "aaaa").% |
       
  1095                 //( "a" | "aa" | "aaa" | "aaaa" | "aaaaa").% 
       
  1096 (((STAR("a") | ( STAR("aaa")) | STAR("aaaaa"| ( STAR("aaaaaaa")) | STAR("aaaaaaaaaaa"))).%).%).%
       
  1097 
       
  1098 // @main
       
  1099 
       
  1100 def lcm(list: Seq[Int]):Int=list.foldLeft(1:Int){
       
  1101   (a, b) => b * a /
       
  1102   Stream.iterate((a,b)){case (x,y) => (y, x%y)}.dropWhile(_._2 != 0).head._1.abs
       
  1103 }
       
  1104 
       
  1105 def small() = {
       
  1106   //val pderSTAR = pderUNIV(STARREG)
       
  1107 
       
  1108   //val refSize = pderSTAR.map(size(_)).sum
       
  1109   for(n <- 1 to 12){
       
  1110     val STARREG = n_astar_alts(n)
       
  1111     val iMax = (lcm((1 to n).toList))
       
  1112     for(i <- 1 to iMax + 1 ){// 100, 400, 800, 840, 841, 900 
       
  1113       val prog0 = "a" * i
       
  1114       //println(s"test: $prog0")
       
  1115       print(n)
       
  1116       print(" ")
       
  1117       // print(i)
       
  1118       // print(" ")
       
  1119       println(asize(bders_simp(prog0.toList, internalise(STARREG))))
       
  1120     }
       
  1121   }
       
  1122 }
       
  1123 
       
  1124 def generator_test() {
       
  1125 
       
  1126   // test(rexp(7), 1000) { (r: Rexp) => 
       
  1127   //   val ss = stringsFromRexp(r)
       
  1128   //   val boolList = ss.map(s => {
       
  1129   //     val simpVal = simpBlexer(r, s)
       
  1130   //     val strongVal = strongBlexer(r, s)
       
  1131   //     // println(simpVal)
       
  1132   //     // println(strongVal)
       
  1133   //     (simpVal == strongVal) && (simpVal != None) 
       
  1134   //   })
       
  1135   //   !boolList.exists(b => b == false)
       
  1136   // }
       
  1137   // test(single(STAR(ALTS(STAR(CHAR('c')),ALTS(CHAR('c'),ZERO)))), 100000) { (r: Rexp) => 
       
  1138   //   val ss = stringsFromRexp(r)
       
  1139   //   val boolList = ss.map(s => {
       
  1140   //     val bdStrong = bdersStrong(s.toList, internalise(r))
       
  1141   //     val bdStrong5 = bdersStrong5(s.toList, internalise(r))
       
  1142   //     // println(shortRexpOutput(r))
       
  1143   //     // println(s)
       
  1144   //     // println(strongVal)
       
  1145   //     // println(strongVal5)
       
  1146   //     // (strongVal == strongVal5) 
       
  1147 
       
  1148   //     if(asize(bdStrong5) > asize(bdStrong)){
       
  1149   //       println(s)
       
  1150   //       println(shortRexpOutput(erase(bdStrong5)))
       
  1151   //       println(shortRexpOutput(erase(bdStrong)))
       
  1152   //     }
       
  1153   //     asize(bdStrong5) <= asize(bdStrong)
       
  1154   //   })
       
  1155   //   !boolList.exists(b => b == false)
       
  1156   // }
       
  1157   //*** example where bdStrong5 has a smaller size than bdStrong
       
  1158   // test(single(STAR(SEQ(ALTS(SEQ(STAR(CHAR('a')),ALTS(ALTS(ONE,ZERO),SEQ(ONE,ONE))),CHAR('a')),ONE))), 1) { (r: Rexp) => 
       
  1159   //*** depth 5 example bdStrong5 larger size than bdStrong
       
  1160   // test(single(STAR(SEQ(SEQ(ALTS(CHAR('b'),STAR(CHAR('b'))),CHAR('b')),(ALTS(STAR(CHAR('c')), ONE))))), 1) {(r: Rexp) =>
       
  1161  
       
  1162  
       
  1163  
       
  1164   //sanity check from Christian's request
       
  1165   // val r = ("a" | "ab") ~ ("bc" | "c")
       
  1166   // val a = internalise(r)
       
  1167   // val aval = blexing_simp(r, "abc")
       
  1168   // println(aval)
       
  1169 
       
  1170   //sample counterexample:(depth 7)
       
  1171   //STAR(SEQ(ALTS(STAR(STAR(STAR(STAR(CHAR(c))))),ALTS(CHAR(c),CHAR(b))),ALTS(ZERO,SEQ(ALTS(ALTS(STAR(CHAR(c)),SEQ(CHAR(b),CHAR(a))),CHAR(c)),STAR(ALTS(ALTS(ONE,CHAR(a)),STAR(CHAR(c))))))))
       
  1172   //(depth5)
       
  1173   //STAR(SEQ(ALTS(ALTS(STAR(CHAR(b)),SEQ(ONE,CHAR(b))),SEQ(STAR(CHAR(a)),CHAR(b))),ALTS(ZERO,ALTS(STAR(CHAR(b)),STAR(CHAR(a))))))
       
  1174   test(rexp(4), 10000000) { (r: Rexp) => 
       
  1175   // ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) => 
       
  1176     val ss = stringsFromRexp(r)
       
  1177     val boolList = ss.map(s => {
       
  1178       val bdStrong = bdersStrong(s.toList, internalise(r))
       
  1179       val bdStrong5 = bdersStrong5(s.toList, internalise(r))
       
  1180       val bdFurther5 = strongBsimp5(bdStrong5)
       
  1181       val sizeFurther5 = asize(bdFurther5)
       
  1182       val pdersSet = pderUNIV(r)
       
  1183       val size5 = asize(bdStrong5)
       
  1184       val size0 = asize(bdStrong)
       
  1185       // println(s)
       
  1186       // println("pdersSet size")
       
  1187       // println(pdersSet.size)
       
  1188       // pdersSet.foreach(r => println(shortRexpOutput(r)))
       
  1189       // println("after bdStrong5")
       
  1190       
       
  1191       // println(shortRexpOutput(erase(bdStrong5)))
       
  1192       // println(breakIntoTerms(erase(bdStrong5)).size)
       
  1193       
       
  1194       // println("after bdStrong")
       
  1195       // println(shortRexpOutput(erase(bdStrong)))
       
  1196       // println(breakIntoTerms(erase(bdStrong)).size)
       
  1197       // println(size5, size0, sizeFurther5)
       
  1198       // aprint(strongBsimp5(bdStrong))
       
  1199       // println(asize(strongBsimp5(bdStrong5)))
       
  1200       // println(s)
       
  1201       size5 <= size0 
       
  1202     })
       
  1203     // println(boolList)
       
  1204     //!boolList.exists(b => b == false)
       
  1205     !boolList.exists(b => b == false)
       
  1206   }
       
  1207 
       
  1208 
       
  1209 }
       
  1210 //small()
       
  1211 // generator_test()
       
  1212 
       
  1213 // 1
       
  1214 
       
  1215 // SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))),
       
  1216 // SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))),
       
  1217 // STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))),
       
  1218 // SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE))
       
  1219 
       
  1220 
       
  1221 // Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))
       
  1222 // Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))
       
  1223 
       
  1224 
       
  1225 //def time[R](block: => R): R = {
       
  1226 //    val t0 = System.nanoTime()
       
  1227 //    val result = block    // call-by-name
       
  1228 //    val t1 = System.nanoTime()
       
  1229 //    println(" " + (t1 - t0)/1e9 + "")
       
  1230 //    result
       
  1231 //}
       
  1232 //val astarstarB  = STAR(STAR("a"))~("b")
       
  1233 //val data_points_x_axis = List.range(0, 60000, 3000)
       
  1234 //for (i <- data_points_x_axis) {print(i); time { blexing(astarstarB, ("a"*i)) } }
       
  1235 ////println(data_points_x_axis.zip( List(0,1,3,4)))
       
  1236 //for(i <- List.range(0, 40000, 3000)) print(i + " ")
       
  1237