| 403 |      1 | // A simple lexer inspired by work of Sulzmann & Lu
 | 
|  |      2 | //==================================================
 | 
|  |      3 | //
 | 
|  |      4 | // Call the test cases with 
 | 
|  |      5 | //
 | 
|  |      6 | //   amm lexer.sc small
 | 
|  |      7 | //   amm lexer.sc fib
 | 
|  |      8 | //   amm lexer.sc loops
 | 
|  |      9 | //   amm lexer.sc email
 | 
|  |     10 | //
 | 
|  |     11 | //   amm lexer.sc all
 | 
|  |     12 | 
 | 
|  |     13 | 
 | 
|  |     14 | // regular expressions including records
 | 
|  |     15 | abstract class Rexp 
 | 
|  |     16 | case object ZERO extends Rexp
 | 
|  |     17 | case object ONE extends Rexp
 | 
|  |     18 | case object ANYCHAR extends Rexp
 | 
|  |     19 | case class CHAR(c: Char) extends Rexp
 | 
|  |     20 | case class ALTS(r1: Rexp, r2: Rexp) extends Rexp 
 | 
|  |     21 | case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
 | 
|  |     22 | case class STAR(r: Rexp) extends Rexp 
 | 
|  |     23 | case class RECD(x: String, r: Rexp) extends Rexp  
 | 
|  |     24 | case class NTIMES(n: Int, r: Rexp) extends Rexp
 | 
|  |     25 | case class OPTIONAL(r: Rexp) extends Rexp
 | 
|  |     26 | case class NOT(r: Rexp) extends Rexp
 | 
|  |     27 |                 // records for extracting strings or tokens
 | 
|  |     28 |   
 | 
|  |     29 | // values  
 | 
|  |     30 | abstract class Val
 | 
|  |     31 | case object Empty extends Val
 | 
|  |     32 | case class Chr(c: Char) extends Val
 | 
|  |     33 | case class Sequ(v1: Val, v2: Val) extends Val
 | 
|  |     34 | case class Left(v: Val) extends Val
 | 
|  |     35 | case class Right(v: Val) extends Val
 | 
|  |     36 | case class Stars(vs: List[Val]) extends Val
 | 
|  |     37 | case class Rec(x: String, v: Val) extends Val
 | 
|  |     38 | case class Ntime(vs: List[Val]) extends Val
 | 
|  |     39 | case class Optionall(v: Val) extends Val
 | 
|  |     40 | case class Nots(s: String) extends Val
 | 
|  |     41 | 
 | 
|  |     42 | 
 | 
|  |     43 | 
 | 
|  |     44 | abstract class Bit
 | 
|  |     45 | case object Z extends Bit
 | 
|  |     46 | case object S extends Bit
 | 
|  |     47 | 
 | 
|  |     48 | 
 | 
|  |     49 | type Bits = List[Bit]
 | 
|  |     50 | 
 | 
|  |     51 | abstract class ARexp 
 | 
|  |     52 | case object AZERO extends ARexp
 | 
|  |     53 | case class AONE(bs: Bits) extends ARexp
 | 
|  |     54 | case class ACHAR(bs: Bits, c: Char) extends ARexp
 | 
|  |     55 | case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
 | 
|  |     56 | case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
 | 
|  |     57 | case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
 | 
|  |     58 | case class ANOT(bs: Bits, r: ARexp) extends ARexp
 | 
|  |     59 | case class AANYCHAR(bs: Bits) extends ARexp
 | 
|  |     60 | 
 | 
|  |     61 | 
 | 
|  |     62 |    
 | 
|  |     63 | // some convenience for typing in regular expressions
 | 
|  |     64 | 
 | 
|  |     65 | def charlist2rexp(s : List[Char]): Rexp = s match {
 | 
|  |     66 |   case Nil => ONE
 | 
|  |     67 |   case c::Nil => CHAR(c)
 | 
|  |     68 |   case c::s => SEQ(CHAR(c), charlist2rexp(s))
 | 
|  |     69 | }
 | 
|  |     70 | implicit def string2rexp(s : String) : Rexp = 
 | 
|  |     71 |   charlist2rexp(s.toList)
 | 
|  |     72 | 
 | 
|  |     73 | implicit def RexpOps(r: Rexp) = new {
 | 
|  |     74 |   def | (s: Rexp) = ALTS(r, s)
 | 
|  |     75 |   def % = STAR(r)
 | 
|  |     76 |   def ~ (s: Rexp) = SEQ(r, s)
 | 
|  |     77 | }
 | 
|  |     78 | 
 | 
|  |     79 | implicit def stringOps(s: String) = new {
 | 
|  |     80 |   def | (r: Rexp) = ALTS(s, r)
 | 
|  |     81 |   def | (r: String) = ALTS(s, r)
 | 
|  |     82 |   def % = STAR(s)
 | 
|  |     83 |   def ~ (r: Rexp) = SEQ(s, r)
 | 
|  |     84 |   def ~ (r: String) = SEQ(s, r)
 | 
|  |     85 |   def $ (r: Rexp) = RECD(s, r)
 | 
|  |     86 | }
 | 
|  |     87 | 
 | 
|  |     88 | def nullable(r: Rexp) : Boolean = r match {
 | 
|  |     89 |   case ZERO => false
 | 
|  |     90 |   case ONE => true
 | 
|  |     91 |   case CHAR(_) => false
 | 
|  |     92 |   case ANYCHAR => false
 | 
|  |     93 |   case ALTS(r1, r2) => nullable(r1) || nullable(r2)
 | 
|  |     94 |   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
 | 
|  |     95 |   case STAR(_) => true
 | 
|  |     96 |   case RECD(_, r1) => nullable(r1)
 | 
|  |     97 |   case NTIMES(n, r) => if (n == 0) true else nullable(r)
 | 
|  |     98 |   case OPTIONAL(r) => true
 | 
|  |     99 |   case NOT(r) => !nullable(r)
 | 
|  |    100 | }
 | 
|  |    101 | 
 | 
|  |    102 | def der(c: Char, r: Rexp) : Rexp = r match {
 | 
|  |    103 |   case ZERO => ZERO
 | 
|  |    104 |   case ONE => ZERO
 | 
|  |    105 |   case CHAR(d) => if (c == d) ONE else ZERO
 | 
|  |    106 |   case ANYCHAR => ONE 
 | 
|  |    107 |   case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
 | 
|  |    108 |   case SEQ(r1, r2) => 
 | 
|  |    109 |     if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
 | 
|  |    110 |     else SEQ(der(c, r1), r2)
 | 
|  |    111 |   case STAR(r) => SEQ(der(c, r), STAR(r))
 | 
|  |    112 |   case RECD(_, r1) => der(c, r1)
 | 
|  |    113 |   case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
 | 
|  |    114 |   case OPTIONAL(r) => der(c, r)
 | 
|  |    115 |   case NOT(r) =>  NOT(der(c, r))
 | 
|  |    116 | }
 | 
|  |    117 | 
 | 
|  |    118 | 
 | 
|  |    119 | // extracts a string from a value
 | 
|  |    120 | def flatten(v: Val) : String = v match {
 | 
|  |    121 |   case Empty => ""
 | 
|  |    122 |   case Chr(c) => c.toString
 | 
|  |    123 |   case Left(v) => flatten(v)
 | 
|  |    124 |   case Right(v) => flatten(v)
 | 
|  |    125 |   case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
 | 
|  |    126 |   case Stars(vs) => vs.map(flatten).mkString
 | 
|  |    127 |   case Ntime(vs) => vs.map(flatten).mkString
 | 
|  |    128 |   case Optionall(v) => flatten(v)
 | 
|  |    129 |   case Rec(_, v) => flatten(v)
 | 
|  |    130 | }
 | 
|  |    131 | 
 | 
|  |    132 | 
 | 
|  |    133 | // extracts an environment from a value;
 | 
|  |    134 | // used for tokenising a string
 | 
|  |    135 | def env(v: Val) : List[(String, String)] = v match {
 | 
|  |    136 |   case Empty => Nil
 | 
|  |    137 |   case Chr(c) => Nil
 | 
|  |    138 |   case Left(v) => env(v)
 | 
|  |    139 |   case Right(v) => env(v)
 | 
|  |    140 |   case Sequ(v1, v2) => env(v1) ::: env(v2)
 | 
|  |    141 |   case Stars(vs) => vs.flatMap(env)
 | 
|  |    142 |   case Ntime(vs) => vs.flatMap(env)
 | 
|  |    143 |   case Rec(x, v) => (x, flatten(v))::env(v)
 | 
|  |    144 |   case Optionall(v) => env(v)
 | 
|  |    145 |   case Nots(s) => ("Negative", s) :: Nil
 | 
|  |    146 | }
 | 
|  |    147 | 
 | 
|  |    148 | 
 | 
|  |    149 | // The injection and mkeps part of the lexer
 | 
|  |    150 | //===========================================
 | 
|  |    151 | 
 | 
|  |    152 | def mkeps(r: Rexp) : Val = r match {
 | 
|  |    153 |   case ONE => Empty
 | 
|  |    154 |   case ALTS(r1, r2) => 
 | 
|  |    155 |     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
 | 
|  |    156 |   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
 | 
|  |    157 |   case STAR(r) => Stars(Nil)
 | 
|  |    158 |   case RECD(x, r) => Rec(x, mkeps(r))
 | 
|  |    159 |   case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
 | 
|  |    160 |   case OPTIONAL(r) => Optionall(Empty)
 | 
|  |    161 |   case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")  
 | 
|  |    162 |                          else Nots("")//Nots(s.reverse.toString)
 | 
|  |    163 | //   case NOT(ZERO) => Empty
 | 
|  |    164 | //   case NOT(CHAR(c)) => Empty
 | 
|  |    165 | //   case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
 | 
|  |    166 | //   case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
 | 
|  |    167 | //   case NOT(STAR(r)) => Stars(Nil) 
 | 
|  |    168 | 
 | 
|  |    169 | }
 | 
|  |    170 | 
 | 
|  |    171 | def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
 | 
|  |    172 |   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
 | 
|  |    173 |   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
 | 
|  |    174 |   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
 | 
|  |    175 |   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
 | 
|  |    176 |   case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
 | 
|  |    177 |   case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
 | 
|  |    178 |   case (CHAR(d), Empty) => Chr(c) 
 | 
|  |    179 |   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
 | 
|  |    180 |   case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
 | 
|  |    181 |   case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
 | 
|  |    182 |   case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
 | 
|  |    183 |   case (ANYCHAR, Empty) => Chr(c)
 | 
|  |    184 | }
 | 
|  |    185 | 
 | 
|  |    186 | // some "rectification" functions for simplification
 | 
|  |    187 | def F_ID(v: Val): Val = v
 | 
|  |    188 | def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
 | 
|  |    189 | def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
 | 
|  |    190 | def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
 | 
|  |    191 |   case Right(v) => Right(f2(v))
 | 
|  |    192 |   case Left(v) => Left(f1(v))
 | 
|  |    193 | }
 | 
|  |    194 | def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
 | 
|  |    195 |   case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
 | 
|  |    196 | }
 | 
|  |    197 | def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
 | 
|  |    198 |   (v:Val) => Sequ(f1(Empty), f2(v))
 | 
|  |    199 | def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
 | 
|  |    200 |   (v:Val) => Sequ(f1(v), f2(Empty))
 | 
|  |    201 | 
 | 
|  |    202 | def F_ERROR(v: Val): Val = throw new Exception("error")
 | 
|  |    203 | 
 | 
|  |    204 | // simplification
 | 
|  |    205 | def simp(r: Rexp): (Rexp, Val => Val) = r match {
 | 
|  |    206 |   case ALTS(r1, r2) => {
 | 
|  |    207 |     val (r1s, f1s) = simp(r1)
 | 
|  |    208 |     val (r2s, f2s) = simp(r2)
 | 
|  |    209 |     (r1s, r2s) match {
 | 
|  |    210 |       case (ZERO, _) => (r2s, F_RIGHT(f2s))
 | 
|  |    211 |       case (_, ZERO) => (r1s, F_LEFT(f1s))
 | 
|  |    212 |       case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
 | 
|  |    213 |                 else (ALTS (r1s, r2s), F_ALT(f1s, f2s)) 
 | 
|  |    214 |     }
 | 
|  |    215 |   }
 | 
|  |    216 |   case SEQ(r1, r2) => {
 | 
|  |    217 |     val (r1s, f1s) = simp(r1)
 | 
|  |    218 |     val (r2s, f2s) = simp(r2)
 | 
|  |    219 |     (r1s, r2s) match {
 | 
|  |    220 |       case (ZERO, _) => (ZERO, F_ERROR)
 | 
|  |    221 |       case (_, ZERO) => (ZERO, F_ERROR)
 | 
|  |    222 |       case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
 | 
|  |    223 |       case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
 | 
|  |    224 |       case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
 | 
|  |    225 |     }
 | 
|  |    226 |   }
 | 
|  |    227 |   case r => (r, F_ID)
 | 
|  |    228 | }
 | 
|  |    229 | 
 | 
|  |    230 | // lexing functions including simplification
 | 
|  |    231 | def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
 | 
|  |    232 |   case Nil => if (nullable(r)) mkeps(r) else 
 | 
|  |    233 |     { throw new Exception(s"lexing error $r not nullable") } 
 | 
|  |    234 |   case c::cs => {
 | 
|  |    235 |     val (r_simp, f_simp) = simp(der(c, r))
 | 
|  |    236 |     inj(r, c, f_simp(lex_simp(r_simp, cs)))
 | 
|  |    237 |   }
 | 
|  |    238 | }
 | 
|  |    239 | 
 | 
|  |    240 | def lexing_simp(r: Rexp, s: String) = 
 | 
|  |    241 |   env(lex_simp(r, s.toList))
 | 
|  |    242 | 
 | 
|  |    243 | // The Lexing Rules for the WHILE Language
 | 
|  |    244 | 
 | 
|  |    245 | def PLUS(r: Rexp) = r ~ r.%
 | 
|  |    246 | 
 | 
|  |    247 | def Range(s : List[Char]) : Rexp = s match {
 | 
|  |    248 |   case Nil => ZERO
 | 
|  |    249 |   case c::Nil => CHAR(c)
 | 
|  |    250 |   case c::s => ALTS(CHAR(c), Range(s))
 | 
|  |    251 | }
 | 
|  |    252 | def RANGE(s: String) = Range(s.toList)
 | 
|  |    253 | 
 | 
|  |    254 | val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_")
 | 
|  |    255 | val DIGIT = RANGE("0123456789")
 | 
|  |    256 | val ID = SYM ~ (SYM | DIGIT).% 
 | 
|  |    257 | val NUM = PLUS(DIGIT)
 | 
|  |    258 | val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" 
 | 
|  |    259 | val SEMI: Rexp = ";"
 | 
|  |    260 | val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
 | 
|  |    261 | val WHITESPACE = PLUS(" " | "\n" | "\t" | "\r")
 | 
|  |    262 | val RPAREN: Rexp = "{"
 | 
|  |    263 | val LPAREN: Rexp = "}"
 | 
|  |    264 | val STRING: Rexp = "\"" ~ SYM.% ~ "\""
 | 
|  |    265 | 
 | 
|  |    266 | 
 | 
|  |    267 | //ab \ a --> 1b
 | 
|  |    268 | //
 | 
|  |    269 | val WHILE_REGS = (("k" $ KEYWORD) | 
 | 
|  |    270 |                   ("i" $ ID) | 
 | 
|  |    271 |                   ("o" $ OP) | 
 | 
|  |    272 |                   ("n" $ NUM) | 
 | 
|  |    273 |                   ("s" $ SEMI) | 
 | 
|  |    274 |                   ("str" $ STRING) |
 | 
|  |    275 |                   ("p" $ (LPAREN | RPAREN)) | 
 | 
|  |    276 |                   ("w" $ WHITESPACE)).%
 | 
|  |    277 | 
 | 
|  |    278 | val NREGS = NTIMES(5, OPTIONAL(SYM))
 | 
|  |    279 | val NREGS1 = ("test" $ NREGS)
 | 
|  |    280 | // Two Simple While Tests
 | 
|  |    281 | //========================
 | 
|  |    282 | val NOTREG = "hehe" ~ NOT((ANYCHAR.%) ~ "haha" ~ (ANYCHAR.%))
 | 
|  |    283 | 
 | 
|  |    284 | 
 | 
|  |    285 |   // bnullable function: tests whether the aregular 
 | 
|  |    286 |   // expression can recognise the empty string
 | 
|  |    287 | def bnullable (r: ARexp) : Boolean = r match {
 | 
|  |    288 |     case AZERO => false
 | 
|  |    289 |     case AONE(_) => true
 | 
|  |    290 |     case ACHAR(_,_) => false
 | 
|  |    291 |     case AALTS(_, rs) => rs.exists(bnullable)
 | 
|  |    292 |     case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
 | 
|  |    293 |     case ASTAR(_, _) => true
 | 
|  |    294 |     case ANOT(_, rn) => !bnullable(rn)
 | 
|  |    295 |   }
 | 
|  |    296 | 
 | 
|  |    297 | def mkepsBC(r: ARexp) : Bits = r match {
 | 
|  |    298 |     case AONE(bs) => bs
 | 
|  |    299 |     case AALTS(bs, rs) => {
 | 
|  |    300 |       val n = rs.indexWhere(bnullable)
 | 
|  |    301 |       bs ++ mkepsBC(rs(n))
 | 
|  |    302 |     }
 | 
|  |    303 |     case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
 | 
|  |    304 |     case ASTAR(bs, r) => bs ++ List(Z)
 | 
|  |    305 |     case ANOT(bs, rn) => bs
 | 
|  |    306 |   }
 | 
|  |    307 | 
 | 
|  |    308 | 
 | 
|  |    309 | def bder(c: Char, r: ARexp) : ARexp = r match {
 | 
|  |    310 |     case AZERO => AZERO
 | 
|  |    311 |     case AONE(_) => AZERO
 | 
|  |    312 |     case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
 | 
|  |    313 |     case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
 | 
|  |    314 |     case ASEQ(bs, r1, r2) => 
 | 
|  |    315 |       if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
 | 
|  |    316 |       else ASEQ(bs, bder(c, r1), r2)
 | 
|  |    317 |     case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
 | 
|  |    318 |     case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
 | 
|  |    319 |     case AANYCHAR(bs) => AONE(bs)
 | 
|  |    320 |   } 
 | 
|  |    321 | 
 | 
|  |    322 | def fuse(bs: Bits, r: ARexp) : ARexp = r match {
 | 
|  |    323 |     case AZERO => AZERO
 | 
|  |    324 |     case AONE(cs) => AONE(bs ++ cs)
 | 
|  |    325 |     case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
 | 
|  |    326 |     case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
 | 
|  |    327 |     case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
 | 
|  |    328 |     case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
 | 
|  |    329 |     case ANOT(cs, r) => ANOT(bs ++ cs, r)
 | 
|  |    330 |   }
 | 
|  |    331 | 
 | 
|  |    332 | 
 | 
|  |    333 | def internalise(r: Rexp) : ARexp = r match {
 | 
|  |    334 |     case ZERO => AZERO
 | 
|  |    335 |     case ONE => AONE(Nil)
 | 
|  |    336 |     case CHAR(c) => ACHAR(Nil, c)
 | 
|  |    337 |     //case PRED(f) => APRED(Nil, f)
 | 
|  |    338 |     case ALTS(r1, r2) => 
 | 
|  |    339 |       AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
 | 
|  |    340 |     // case ALTS(r1::rs) => {
 | 
|  |    341 |     //   val AALTS(Nil, rs2) = internalise(ALTS(rs))
 | 
|  |    342 |     //   AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
 | 
|  |    343 |     // }
 | 
|  |    344 |     case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
 | 
|  |    345 |     case STAR(r) => ASTAR(Nil, internalise(r))
 | 
|  |    346 |     case RECD(x, r) => internalise(r)
 | 
|  |    347 |     case NOT(r) => ANOT(Nil, internalise(r))
 | 
|  |    348 |     case ANYCHAR => AANYCHAR(Nil)
 | 
|  |    349 |   }
 | 
|  |    350 | 
 | 
|  |    351 | 
 | 
|  |    352 | def bsimp(r: ARexp): ARexp = 
 | 
|  |    353 |   {
 | 
|  |    354 |     r match {
 | 
|  |    355 |       case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
 | 
|  |    356 |           case (AZERO, _) => AZERO
 | 
|  |    357 |           case (_, AZERO) => AZERO
 | 
|  |    358 |           case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
 | 
|  |    359 |           case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
 | 
|  |    360 |       }
 | 
|  |    361 |       case AALTS(bs1, rs) => {
 | 
|  |    362 |             val rs_simp = rs.map(bsimp(_))
 | 
|  |    363 |             val flat_res = flats(rs_simp)
 | 
| 409 |    364 |             val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
 | 
| 403 |    365 |             dist_res match {
 | 
|  |    366 |               case Nil => AZERO
 | 
|  |    367 |               case s :: Nil => fuse(bs1, s)
 | 
|  |    368 |               case rs => AALTS(bs1, rs)  
 | 
|  |    369 |             }
 | 
|  |    370 |           
 | 
|  |    371 |       }
 | 
|  |    372 |       case r => r
 | 
|  |    373 |     }
 | 
|  |    374 |   }
 | 
| 409 |    375 |   def strongBsimp(r: ARexp): ARexp =
 | 
|  |    376 |   {
 | 
|  |    377 |     r match {
 | 
|  |    378 |       case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
 | 
|  |    379 |           case (AZERO, _) => AZERO
 | 
|  |    380 |           case (_, AZERO) => AZERO
 | 
|  |    381 |           case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
 | 
|  |    382 |           case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
 | 
|  |    383 |       }
 | 
|  |    384 |       case AALTS(bs1, rs) => {
 | 
|  |    385 |             val rs_simp = rs.map(strongBsimp(_))
 | 
|  |    386 |             val flat_res = flats(rs_simp)
 | 
| 412 |    387 |             val dist_res = distinctBy2(flat_res)//distinctBy(flat_res, erase)
 | 
| 409 |    388 |             dist_res match {
 | 
|  |    389 |               case Nil => AZERO
 | 
|  |    390 |               case s :: Nil => fuse(bs1, s)
 | 
|  |    391 |               case rs => AALTS(bs1, rs)  
 | 
|  |    392 |             }
 | 
|  |    393 |           
 | 
|  |    394 |       }
 | 
|  |    395 |       case r => r
 | 
|  |    396 |     }
 | 
|  |    397 |   }
 | 
|  |    398 | 
 | 
| 403 |    399 |   def bders (s: List[Char], r: ARexp) : ARexp = s match {
 | 
|  |    400 |     case Nil => r
 | 
|  |    401 |     case c::s => bders(s, bder(c, r))
 | 
|  |    402 |   }
 | 
|  |    403 | 
 | 
|  |    404 |   def flats(rs: List[ARexp]): List[ARexp] = rs match {
 | 
|  |    405 |       case Nil => Nil
 | 
|  |    406 |       case AZERO :: rs1 => flats(rs1)
 | 
|  |    407 |       case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
 | 
|  |    408 |       case r1 :: rs2 => r1 :: flats(rs2)
 | 
|  |    409 |     }
 | 
|  |    410 | 
 | 
|  |    411 |   def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
 | 
|  |    412 |     case Nil => Nil
 | 
|  |    413 |     case (x::xs) => {
 | 
|  |    414 |       val res = f(x)
 | 
|  |    415 |       if (acc.contains(res)) distinctBy(xs, f, acc)  
 | 
|  |    416 |       else x::distinctBy(xs, f, res::acc)
 | 
|  |    417 |     }
 | 
|  |    418 |   } 
 | 
|  |    419 | 
 | 
| 412 |    420 |   def projectFirstChild(rs: List[ARexp]) : List[ARexp] = rs match {
 | 
|  |    421 |     case (ASEQ(bs, r1, r2p)::rs1) => r1::projectFirstChild(rs1)
 | 
|  |    422 |     case Nil => Nil
 | 
|  |    423 |   }
 | 
|  |    424 | 
 | 
|  |    425 | 
 | 
|  |    426 |   def distinctBy2(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match {
 | 
|  |    427 |     case Nil => Nil
 | 
|  |    428 |     case (x::xs) => {
 | 
|  |    429 |       val res = erase(x)
 | 
|  |    430 |       if(acc.contains(res))
 | 
|  |    431 |         distinctBy2(xs, acc)
 | 
|  |    432 |       else
 | 
|  |    433 |         x match {
 | 
|  |    434 |           case ASEQ(bs0, AALTS(bs1, rs), r2) => 
 | 
|  |    435 |             val newTerms =  distinctBy2(rs.map(r1 => ASEQ(Nil, r1, r2)), acc)
 | 
|  |    436 |             val rsPrime = projectFirstChild(newTerms)
 | 
|  |    437 |             newTerms match {
 | 
|  |    438 |               case Nil => distinctBy2(xs, acc)
 | 
|  |    439 |               case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy2(xs, erase(t)::acc)
 | 
|  |    440 |               case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy2(xs, newTerms.map(erase(_)):::acc)
 | 
|  |    441 |             }
 | 
|  |    442 |           case x => x::distinctBy2(xs, res::acc)
 | 
|  |    443 |         }
 | 
|  |    444 |     }
 | 
| 414 |    445 |   }
 | 
|  |    446 | 
 | 
|  |    447 | 
 | 
|  |    448 | 
 | 
|  |    449 |   def distinctBy3(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match {
 | 
|  |    450 |     case Nil => Nil
 | 
|  |    451 |     case (x::xs) => {
 | 
|  |    452 |       val res = erase(x)
 | 
|  |    453 |       if(acc.contains(res))
 | 
|  |    454 |         distinctBy3(xs, acc)
 | 
|  |    455 |       else
 | 
|  |    456 |         x match {
 | 
|  |    457 |           case ASEQ(bs0, AALTS(bs1, rs), r2) => 
 | 
|  |    458 |             val newTerms =  distinctBy3(rs.map(r1 => ASEQ(Nil, r1, r2)), acc)
 | 
|  |    459 |             val rsPrime = projectFirstChild(newTerms)
 | 
|  |    460 |             newTerms match {
 | 
|  |    461 |               case Nil => distinctBy3(xs, acc)
 | 
|  |    462 |               case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy3(xs, erase(t)::acc)
 | 
|  |    463 |               case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy3(xs, newTerms.map(erase(_)):::acc)
 | 
|  |    464 |             }
 | 
|  |    465 |           case x => x::distinctBy3(xs, res::acc)
 | 
|  |    466 |         }
 | 
|  |    467 |     }
 | 
|  |    468 |   }
 | 
|  |    469 | 
 | 
|  |    470 |   def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
 | 
|  |    471 |     case SEQ(r1, r2)  => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
 | 
|  |    472 |     case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
 | 
|  |    473 |     case _ => r::Nil
 | 
| 412 |    474 |   } 
 | 
|  |    475 | 
 | 
| 403 |    476 |   def prettyRexp(r: Rexp) : String = r match {
 | 
|  |    477 |       case STAR(r0) => s"${prettyRexp(r0)}*"
 | 
|  |    478 |       case SEQ(CHAR(c), r2) => c.toString ++ prettyRexp(r2)
 | 
|  |    479 |       case SEQ(r1, r2) => s"${prettyRexp(r1)}~${prettyRexp(r2)}"
 | 
|  |    480 |       case CHAR(c) => c.toString
 | 
|  |    481 |       case ANYCHAR => "."
 | 
|  |    482 |     //   case NOT(r0) => s
 | 
|  |    483 |   }
 | 
|  |    484 | 
 | 
|  |    485 |   def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
 | 
|  |    486 |     case (ONE, bs) => (Empty, bs)
 | 
|  |    487 |     case (CHAR(f), bs) => (Chr(f), bs)
 | 
|  |    488 |     case (ALTS(r1, r2), Z::bs1) => {
 | 
|  |    489 |         val (v, bs2) = decode_aux(r1, bs1)
 | 
|  |    490 |         (Left(v), bs2)
 | 
|  |    491 |     }
 | 
|  |    492 |     case (ALTS(r1, r2), S::bs1) => {
 | 
|  |    493 |         val (v, bs2) = decode_aux(r2, bs1)
 | 
|  |    494 |         (Right(v), bs2)
 | 
|  |    495 |     }
 | 
|  |    496 |     case (SEQ(r1, r2), bs) => {
 | 
|  |    497 |       val (v1, bs1) = decode_aux(r1, bs)
 | 
|  |    498 |       val (v2, bs2) = decode_aux(r2, bs1)
 | 
|  |    499 |       (Sequ(v1, v2), bs2)
 | 
|  |    500 |     }
 | 
|  |    501 |     case (STAR(r1), S::bs) => {
 | 
|  |    502 |       val (v, bs1) = decode_aux(r1, bs)
 | 
|  |    503 |       //println(v)
 | 
|  |    504 |       val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
 | 
|  |    505 |       (Stars(v::vs), bs2)
 | 
|  |    506 |     }
 | 
|  |    507 |     case (STAR(_), Z::bs) => (Stars(Nil), bs)
 | 
|  |    508 |     case (RECD(x, r1), bs) => {
 | 
|  |    509 |       val (v, bs1) = decode_aux(r1, bs)
 | 
|  |    510 |       (Rec(x, v), bs1)
 | 
|  |    511 |     }
 | 
|  |    512 |     case (NOT(r), bs) => (Nots(prettyRexp(r)), bs)
 | 
|  |    513 |   }
 | 
|  |    514 | 
 | 
|  |    515 |   def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
 | 
|  |    516 |     case (v, Nil) => v
 | 
|  |    517 |     case _ => throw new Exception("Not decodable")
 | 
|  |    518 |   }
 | 
|  |    519 | 
 | 
|  |    520 | 
 | 
|  |    521 | 
 | 
|  |    522 | def blexSimp(r: Rexp, s: String) : List[Bit] = {
 | 
|  |    523 |     blex_simp(internalise(r), s.toList)
 | 
|  |    524 | }
 | 
|  |    525 | 
 | 
|  |    526 | def blexing_simp(r: Rexp, s: String) : Val = {
 | 
|  |    527 |     val bit_code = blex_simp(internalise(r), s.toList)
 | 
|  |    528 |     decode(r, bit_code)
 | 
|  |    529 |   }
 | 
|  |    530 | 
 | 
| 414 |    531 |   def strong_blexing_simp(r: Rexp, s: String) : Val = {
 | 
|  |    532 |     decode(r, strong_blex_simp(internalise(r), s.toList))
 | 
|  |    533 |   }
 | 
|  |    534 | 
 | 
|  |    535 |   def strong_blex_simp(r: ARexp, s: List[Char]) :Bits = s match {
 | 
|  |    536 |     case Nil => {
 | 
|  |    537 |       if (bnullable(r)) {
 | 
|  |    538 |         //println(asize(r))
 | 
|  |    539 |         val bits = mkepsBC(r)
 | 
|  |    540 |         bits
 | 
|  |    541 |       }
 | 
|  |    542 |       else 
 | 
|  |    543 |         throw new Exception("Not matched")
 | 
|  |    544 |     }
 | 
|  |    545 |     case c::cs => {
 | 
|  |    546 |       val der_res = bder(c,r)
 | 
|  |    547 |       val simp_res = strongBsimp(der_res)  
 | 
|  |    548 |       strong_blex_simp(simp_res, cs)      
 | 
|  |    549 |     }
 | 
|  |    550 |   }
 | 
|  |    551 | 
 | 
| 403 |    552 | 
 | 
|  |    553 | 
 | 
|  |    554 |   def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
 | 
|  |    555 |     case Nil => r
 | 
|  |    556 |     case c::s => bders_simp(s, bsimp(bder(c, r)))
 | 
|  |    557 |   }
 | 
|  |    558 |   
 | 
|  |    559 |   def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
 | 
|  |    560 | 
 | 
| 412 |    561 |   def bders_simpS(s: List[Char], r: ARexp) : ARexp = s match {
 | 
|  |    562 |     case Nil => r 
 | 
|  |    563 |     case c::s => bders_simpS(s, strongBsimp(bder(c, r)))
 | 
|  |    564 |   }
 | 
|  |    565 | 
 | 
|  |    566 |   def bdersSimpS(s: String, r: Rexp) : ARexp = bders_simpS(s.toList, internalise(r))
 | 
| 403 |    567 | 
 | 
|  |    568 |   def erase(r:ARexp): Rexp = r match{
 | 
|  |    569 |     case AZERO => ZERO
 | 
|  |    570 |     case AONE(_) => ONE
 | 
|  |    571 |     case ACHAR(bs, c) => CHAR(c)
 | 
|  |    572 |     case AALTS(bs, Nil) => ZERO
 | 
|  |    573 |     case AALTS(bs, a::Nil) => erase(a)
 | 
|  |    574 |     case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
 | 
|  |    575 |     case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
 | 
|  |    576 |     case ASTAR(cs, r)=> STAR(erase(r))
 | 
|  |    577 |     case ANOT(bs, r) => NOT(erase(r))
 | 
|  |    578 |     case AANYCHAR(bs) => ANYCHAR
 | 
|  |    579 |   }
 | 
|  |    580 | 
 | 
|  |    581 | def breakHead(r: ARexp) : List[ARexp] = r match {
 | 
|  |    582 |     case AALTS(bs, rs) => rs
 | 
|  |    583 |     case r => r::Nil
 | 
|  |    584 | }
 | 
|  |    585 | 
 | 
|  |    586 | def distinctByWithAcc[B, C](xs: List[B], f: B => C, 
 | 
|  |    587 |     acc: List[C] = Nil, accB: List[B] = Nil): (List[B], List[C]) = xs match {
 | 
|  |    588 |     case Nil => (accB.reverse, acc)
 | 
|  |    589 |     case (x::xs) => {
 | 
|  |    590 |       val res = f(x)
 | 
|  |    591 |       if (acc.contains(res)) distinctByWithAcc(xs, f, acc, accB)  
 | 
|  |    592 |       else distinctByWithAcc(xs, f, res::acc, x::accB)
 | 
|  |    593 |     }
 | 
|  |    594 |   } 
 | 
|  |    595 | 
 | 
| 412 |    596 | //(r1+r2)r + (r2'+r3)r' ~> (r1+r2)r + (r3)r' (erase r = erase r') (erase r2 = erase r2')
 | 
|  |    597 | //s = s1@s2 \in L R  s1 \in L r2 s2 \in L r
 | 
|  |    598 | //s = s3@s4  s3 \in L r1 s4 \in L r |s3| < |s1| \nexists s3' s4' s.t. s3'@s4' = s and s3' \in L r1 s4' \in L r
 | 
|  |    599 | //(r1+r4)r ~> r1r +_usedToBeSeq r4r 
 | 
|  |    600 | // | ss7:  "erase a01 = erase a02 ∧ (distinctBy as2 erase (set (map erase as1)) = as2p)  ⟹ 
 | 
|  |    601 | //         (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2) (ASTAR bs02 a02)]@rsc)  s↝
 | 
|  |    602 | //         (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2p) (ASTAR bs02 a02)]@rsc)"
 | 
|  |    603 | // | ss8:  "erase a01 = erase a02 ∧ (distinctBy [a2] erase (set (map erase as1)) = [])  ⟹ 
 | 
|  |    604 | //         (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs a2 (ASTAR bs02 a02)]@rsc) s↝                                                
 | 
|  |    605 | //         (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@rsc)"
 | 
|  |    606 | // | ss9:  "erase a01 = erase a02 ∧ (distinctBy as2 erase {erase a1} = as2p)  ⟹ 
 | 
|  |    607 | //         (rsa@[ASEQ bs a1 (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2) (ASTAR bs02 a02)]@rsc) s↝ 
 | 
|  |    608 | //         (rsa@[ASEQ bs a1 (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2p) (ASTAR bs02 a02)]@rsc)"
 | 
| 403 |    609 | 
 | 
| 412 |    610 | 
 | 
|  |    611 | //# of top-level terms
 | 
|  |    612 | //      r1r2 -> r11r2 + r12 -> r21r2 + r22 + r12' -> 4 terms -> 5 terms -> 6..........
 | 
|  |    613 | // if string long enough --> #| r1r2 \s s | > unbounded? No ->  #| r1r2 \s s | <  #| pders UNIV r1| + #|pders UNIV r2| <= awidth r1r2
 | 
|  |    614 | //r* -> r1r* -> r21r* + r22r* -> 4 terms -> 8 terms..............
 | 
| 409 |    615 |   def strongDB(xs: List[ARexp], 
 | 
| 403 |    616 |                        acc1: List[Rexp] = Nil, 
 | 
|  |    617 |                        acc2 : List[(List[Rexp], Rexp)] = Nil): List[ARexp] = xs match {
 | 
|  |    618 |     case Nil => Nil
 | 
|  |    619 |     case (x::xs) => 
 | 
|  |    620 |         if(acc1.contains(erase(x)))
 | 
| 409 |    621 |             strongDB(xs, acc1, acc2)
 | 
| 403 |    622 |         else{
 | 
|  |    623 |             x match {
 | 
|  |    624 |                 case ASTAR(bs0, r0) => 
 | 
|  |    625 |                     val headList : List[ARexp] = List[ARexp](AONE(Nil))
 | 
|  |    626 |                     val i = acc2.indexWhere(
 | 
|  |    627 |                         r2stl => {val (r2s, tl) = r2stl; tl == erase(r0) } 
 | 
|  |    628 |                     )
 | 
|  |    629 |                     if(i == -1){ 
 | 
| 409 |    630 |                         x::strongDB(
 | 
| 403 |    631 |                             xs, erase(x)::acc1, (ONE::Nil, erase(r0))::acc2
 | 
|  |    632 |                         )
 | 
|  |    633 |                     }
 | 
|  |    634 |                     else{
 | 
|  |    635 |                         val headListAlready = acc2(i)
 | 
|  |    636 |                         val (newHeads, oldHeadsUpdated) = 
 | 
|  |    637 |                                 distinctByWithAcc(headList, erase, headListAlready._1)
 | 
|  |    638 |                         newHeads match{
 | 
|  |    639 |                             case newHead::Nil =>
 | 
|  |    640 |                                 ASTAR(bs0, r0) :: 
 | 
| 409 |    641 |                                 strongDB(xs, erase(x)::acc1, 
 | 
| 412 |    642 |                                 acc2.updated(i, (oldHeadsUpdated, headListAlready._2)) )
 | 
| 403 |    643 |                             case Nil =>
 | 
| 409 |    644 |                                 strongDB(xs, erase(x)::acc1, 
 | 
| 403 |    645 |                                 acc2)
 | 
|  |    646 |                         }
 | 
|  |    647 |                     }                
 | 
|  |    648 |                 case ASEQ(bs, r1, ASTAR(bs0, r0)) => 
 | 
|  |    649 |                     val headList = breakHead(r1)
 | 
|  |    650 |                     val i = acc2.indexWhere(
 | 
|  |    651 |                         r2stl => {val (r2s, tl) = r2stl; tl == erase(r0) } 
 | 
|  |    652 |                     )
 | 
|  |    653 |                     if(i == -1){ 
 | 
| 409 |    654 |                         x::strongDB(
 | 
| 403 |    655 |                             xs, erase(x)::acc1, (headList.map(erase(_)), erase(r0))::acc2
 | 
|  |    656 |                         )
 | 
|  |    657 |                     }
 | 
|  |    658 |                     else{
 | 
|  |    659 |                         val headListAlready = acc2(i)
 | 
|  |    660 |                         val (newHeads, oldHeadsUpdated) = 
 | 
|  |    661 |                                 distinctByWithAcc(headList, erase, headListAlready._1)
 | 
|  |    662 |                         newHeads match{
 | 
|  |    663 |                             case newHead::Nil =>
 | 
|  |    664 |                                 ASEQ(bs, newHead, ASTAR(bs0, r0)) :: 
 | 
| 409 |    665 |                                 strongDB(xs, erase(x)::acc1, 
 | 
| 403 |    666 |                                 acc2.updated(i, (oldHeadsUpdated, headListAlready._2)) )//TODO: acc2 already contains headListAlready
 | 
|  |    667 |                             case Nil =>
 | 
| 409 |    668 |                                 strongDB(xs, erase(x)::acc1, 
 | 
| 403 |    669 |                                 acc2)
 | 
|  |    670 |                             case hds => val AALTS(bsp, rsp) = r1
 | 
|  |    671 |                                 ASEQ(bs, AALTS(bsp, hds), ASTAR(bs0, r0)) ::
 | 
| 409 |    672 |                                 strongDB(xs, erase(x)::acc1,
 | 
| 403 |    673 |                                 acc2.updated(i, (oldHeadsUpdated, headListAlready._2)))
 | 
|  |    674 |                         }
 | 
|  |    675 |                     }
 | 
| 409 |    676 |                 case rPrime => x::strongDB(xs, erase(x)::acc1, acc2)    
 | 
| 403 |    677 |             }
 | 
|  |    678 |                 
 | 
|  |    679 |         }
 | 
| 414 |    680 |   
 | 
| 403 |    681 | }
 | 
|  |    682 | 
 | 
| 414 |    683 | def allCharSeq(r: Rexp) : Boolean = r match {
 | 
|  |    684 |   case CHAR(c) => true
 | 
|  |    685 |   case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
 | 
|  |    686 |   case _ => false
 | 
|  |    687 | }
 | 
|  |    688 | 
 | 
|  |    689 | def flattenSeq(r: Rexp) : String = r match {
 | 
|  |    690 |   case CHAR(c) => c.toString
 | 
|  |    691 |   case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
 | 
|  |    692 |   case _ => throw new Error("flatten unflattenable rexp")
 | 
|  |    693 | } 
 | 
|  |    694 | 
 | 
| 412 |    695 | def shortRexpOutput(r: Rexp) : String = r match {
 | 
|  |    696 |     case CHAR(c) => c.toString
 | 
|  |    697 |     case ONE => "1"
 | 
|  |    698 |     case ZERO => "0"
 | 
| 414 |    699 |     case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
 | 
| 412 |    700 |     case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
 | 
|  |    701 |     case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
 | 
|  |    702 |     case STAR(r) => "[" ++ shortRexpOutput(r) ++ "]*"
 | 
|  |    703 |     //case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
 | 
|  |    704 |     //case RTOP => "RTOP"
 | 
|  |    705 |   }
 | 
|  |    706 | 
 | 
| 403 |    707 | 
 | 
|  |    708 | def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
 | 
|  |    709 |     case Nil => {
 | 
|  |    710 |       if (bnullable(r)) {
 | 
|  |    711 |         //println(asize(r))
 | 
|  |    712 |         val bits = mkepsBC(r)
 | 
|  |    713 |         bits
 | 
|  |    714 |       }
 | 
| 414 |    715 |       else 
 | 
|  |    716 |         throw new Exception("Not matched")
 | 
| 403 |    717 |     }
 | 
|  |    718 |     case c::cs => {
 | 
|  |    719 |       val der_res = bder(c,r)
 | 
|  |    720 |       val simp_res = bsimp(der_res)  
 | 
|  |    721 |       blex_simp(simp_res, cs)      
 | 
|  |    722 |     }
 | 
|  |    723 |   }
 | 
|  |    724 |   def size(r: Rexp) : Int = r match {
 | 
|  |    725 |     case ZERO => 1
 | 
|  |    726 |     case ONE => 1
 | 
|  |    727 |     case CHAR(_) => 1
 | 
|  |    728 |     case ANYCHAR => 1
 | 
|  |    729 |     case NOT(r0) => 1 + size(r0)
 | 
|  |    730 |     case SEQ(r1, r2) => 1 + size(r1) + size(r2)
 | 
|  |    731 |     case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
 | 
|  |    732 |     case STAR(r) => 1 + size(r)
 | 
|  |    733 |   }
 | 
|  |    734 | 
 | 
|  |    735 |   def asize(a: ARexp) = size(erase(a))
 | 
|  |    736 | 
 | 
| 412 |    737 | //pder related
 | 
|  |    738 | type Mon = (Char, Rexp)
 | 
|  |    739 | type Lin = Set[Mon]
 | 
|  |    740 | 
 | 
|  |    741 | def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
 | 
|  |    742 |     case ZERO => Set()
 | 
|  |    743 |     case ONE => rs
 | 
|  |    744 |     case r => rs.map((re) => if (re == ONE) r else SEQ(re, r)  )   
 | 
|  |    745 |   }
 | 
|  |    746 |   def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
 | 
|  |    747 |     case ZERO => Set()
 | 
|  |    748 |     case ONE => l
 | 
|  |    749 |     case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) }  )
 | 
|  |    750 |   }
 | 
|  |    751 |   def lf(r: Rexp): Lin = r match {
 | 
|  |    752 |     case ZERO => Set()
 | 
|  |    753 |     case ONE => Set()
 | 
|  |    754 |     case CHAR(f) => {
 | 
|  |    755 |       //val Some(c) = alphabet.find(f) 
 | 
|  |    756 |       Set((f, ONE))
 | 
|  |    757 |     }
 | 
|  |    758 |     case ALTS(r1, r2) => {
 | 
|  |    759 |       lf(r1 ) ++ lf(r2)
 | 
|  |    760 |     }
 | 
|  |    761 |     case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
 | 
|  |    762 |     case SEQ(r1, r2) =>{
 | 
|  |    763 |       if (nullable(r1))
 | 
|  |    764 |         cir_prod(lf(r1), r2) ++ lf(r2)
 | 
|  |    765 |       else
 | 
|  |    766 |         cir_prod(lf(r1), r2) 
 | 
|  |    767 |     }
 | 
|  |    768 |   }
 | 
|  |    769 |   def lfs(r: Set[Rexp]): Lin = {
 | 
|  |    770 |     r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
 | 
|  |    771 |   }
 | 
|  |    772 | 
 | 
|  |    773 |   def pder(x: Char, t: Rexp): Set[Rexp] = {
 | 
|  |    774 |     val lft = lf(t)
 | 
|  |    775 |     (lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
 | 
|  |    776 |   }
 | 
|  |    777 |   def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
 | 
|  |    778 |     case x::xs => pders(xs, pder(x, t))
 | 
|  |    779 |     case Nil => Set(t)
 | 
|  |    780 |   }
 | 
|  |    781 |   def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
 | 
|  |    782 |     case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
 | 
|  |    783 |     case Nil => ts 
 | 
|  |    784 |   }
 | 
|  |    785 |   def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
 | 
|  |    786 |   def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
 | 
|  |    787 |   //all implementation of partial derivatives that involve set union are potentially buggy
 | 
|  |    788 |   //because they don't include the original regular term before they are pdered.
 | 
|  |    789 |   //now only pderas is fixed.
 | 
|  |    790 |   def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
 | 
| 414 |    791 |   def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
 | 
| 412 |    792 |   def awidth(r: Rexp) : Int = r match {
 | 
|  |    793 |     case CHAR(c) => 1
 | 
|  |    794 |     case SEQ(r1, r2) => awidth(r1) + awidth(r2)
 | 
|  |    795 |     case ALTS(r1, r2) => awidth(r1) + awidth(r2)
 | 
|  |    796 |     case ONE => 0
 | 
|  |    797 |     case ZERO => 0
 | 
|  |    798 |     case STAR(r) => awidth(r)
 | 
|  |    799 |   }
 | 
|  |    800 |   //def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
 | 
|  |    801 |   //def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
 | 
|  |    802 |   def o(r: Rexp) = if (nullable(r)) ONE else ZERO
 | 
|  |    803 |   //def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
 | 
|  |    804 |   def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
 | 
|  |    805 |     case ZERO => Set[Rexp]()
 | 
|  |    806 |     case ONE => Set[Rexp]()
 | 
|  |    807 |     case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
 | 
|  |    808 |     case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
 | 
|  |    809 |     case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
 | 
|  |    810 |     case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
 | 
|  |    811 |   }
 | 
|  |    812 |   def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
 | 
|  |    813 |     case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
 | 
|  |    814 |     case Nil => ts   
 | 
|  |    815 |   }
 | 
|  |    816 |   def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
 | 
|  |    817 | 
 | 
|  |    818 | 
 | 
| 403 |    819 | 
 | 
|  |    820 | // @arg(doc = "small tests")
 | 
| 414 |    821 | val STARREG = (((STAR("a") | (STAR("aa")) | STAR("aaa") | STAR("aaaa") | STAR("aaaaa") | (STAR("aaaaaa")) | STAR("aaaaaaa") | STAR("aaaaaaaa")).%))
 | 
|  |    822 | //(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa")).%).%).%
 | 
| 403 |    823 | 
 | 
| 414 |    824 | // @main
 | 
| 403 |    825 | def small() = {
 | 
|  |    826 | 
 | 
| 412 |    827 | 
 | 
| 403 |    828 | //   println(lexing_simp(NOTREG, prog0))
 | 
|  |    829 | //   val v = lex_simp(NOTREG, prog0.toList)
 | 
|  |    830 | //   println(v)
 | 
|  |    831 | 
 | 
|  |    832 | //   val d =  (lex_simp(NOTREG, prog0.toList))
 | 
|  |    833 | //   println(d)
 | 
| 412 |    834 |   val pderSTAR = pderUNIV(STARREG)
 | 
| 414 |    835 | 
 | 
| 412 |    836 |   val refSize = pderSTAR.map(size(_)).sum
 | 
| 414 |    837 |   println("different partial derivative terms:")
 | 
|  |    838 |   pderSTAR.foreach(r => r match {
 | 
|  |    839 |       
 | 
|  |    840 |         case SEQ(head, rstar) =>
 | 
|  |    841 |           println(shortRexpOutput(head) ++ "~STARREG")
 | 
|  |    842 |         case STAR(rstar) =>
 | 
|  |    843 |           println("STARREG")
 | 
|  |    844 |       
 | 
|  |    845 |     }
 | 
|  |    846 |     )
 | 
|  |    847 |   println("the total number of terms is")
 | 
|  |    848 |   //println(refSize)
 | 
|  |    849 |   println(pderSTAR.size)
 | 
|  |    850 |   for(i <- List(1, 10, 100, 400, 800, 840, 900) ){
 | 
| 412 |    851 |     val prog0 = "a" * i
 | 
| 414 |    852 |     //println(s"test: $prog0")
 | 
|  |    853 |     println(s"testing with $i a's" )
 | 
| 412 |    854 |     val bd = bdersSimp(prog0, STARREG)//DB
 | 
|  |    855 |     val sbd = bdersSimpS(prog0, STARREG)//strongDB
 | 
| 414 |    856 |     val subTerms = breakIntoTerms(erase(sbd))
 | 
|  |    857 |     val subTermsLarge = breakIntoTerms(erase(bd))
 | 
|  |    858 |     
 | 
|  |    859 |     println(s"subterms of regex with strongDB: ${subTerms.length}, standard DB: ${subTermsLarge.length}")
 | 
|  |    860 |     println("the number of distinct subterms for bsimp with strongDB and standardDB")
 | 
|  |    861 |     println(subTerms.distinct.size)
 | 
|  |    862 |     println(subTermsLarge.distinct.size)
 | 
| 403 |    863 | 
 | 
| 412 |    864 | 
 | 
|  |    865 |     // println(shortRexpOutput(erase(sbd)))
 | 
|  |    866 |     // println(shortRexpOutput(erase(bd)))
 | 
| 414 |    867 |     
 | 
|  |    868 |     println("pdersize, original, strongSimp")
 | 
|  |    869 |     println(refSize, size(STARREG),  asize(sbd), asize(bd))
 | 
| 403 |    870 | 
 | 
| 414 |    871 |     // val vres = strong_blexing_simp( STARREG, prog0)
 | 
|  |    872 |     // println(vres)
 | 
| 412 |    873 |   }
 | 
| 403 |    874 | //   println(vs.length)
 | 
|  |    875 | //   println(vs)
 | 
|  |    876 |    
 | 
|  |    877 | 
 | 
|  |    878 |   // val prog1 = """read  n; write n"""  
 | 
|  |    879 |   // println(s"test: $prog1")
 | 
|  |    880 |   // println(lexing_simp(WHILE_REGS, prog1))
 | 
|  |    881 | }
 | 
|  |    882 | 
 | 
| 414 |    883 | small()
 | 
|  |    884 | 
 | 
| 403 |    885 | // // Bigger Tests
 | 
|  |    886 | // //==============
 | 
|  |    887 | 
 | 
|  |    888 | // // escapes strings and prints them out as "", "\n" and so on
 | 
|  |    889 | // def esc(raw: String): String = {
 | 
|  |    890 | //   import scala.reflect.runtime.universe._
 | 
|  |    891 | //   Literal(Constant(raw)).toString
 | 
|  |    892 | // }
 | 
|  |    893 | 
 | 
|  |    894 | // def escape(tks: List[(String, String)]) =
 | 
|  |    895 | //   tks.map{ case (s1, s2) => (s1, esc(s2))}
 | 
|  |    896 | 
 | 
|  |    897 | 
 | 
|  |    898 | // val prog2 = """
 | 
|  |    899 | // write "Fib";
 | 
|  |    900 | // read n;
 | 
|  |    901 | // minus1 := 0;
 | 
|  |    902 | // minus2 := 1;
 | 
|  |    903 | // while n > 0 do {
 | 
|  |    904 | //   temp := minus2;
 | 
|  |    905 | //   minus2 := minus1 + minus2;
 | 
|  |    906 | //   minus1 := temp;
 | 
|  |    907 | //   n := n - 1
 | 
|  |    908 | // };
 | 
|  |    909 | // write "Result";
 | 
|  |    910 | // write minus2
 | 
|  |    911 | // """
 | 
|  |    912 | 
 | 
|  |    913 | // @arg(doc = "Fibonacci test")
 | 
|  |    914 | // @main
 | 
|  |    915 | // def fib() = {
 | 
|  |    916 | //   println("lexing fib program")
 | 
|  |    917 | //   println(escape(lexing_simp(WHILE_REGS, prog2)).mkString("\n"))
 | 
|  |    918 | // }
 | 
|  |    919 | 
 | 
|  |    920 | 
 | 
|  |    921 | // val prog3 = """
 | 
|  |    922 | // start := 1000;
 | 
|  |    923 | // x := start;
 | 
|  |    924 | // y := start;
 | 
|  |    925 | // z := start;
 | 
|  |    926 | // while 0 < x do {
 | 
|  |    927 | //  while 0 < y do {
 | 
|  |    928 | //   while 0 < z do {
 | 
|  |    929 | //     z := z - 1
 | 
|  |    930 | //   };
 | 
|  |    931 | //   z := start;
 | 
|  |    932 | //   y := y - 1
 | 
|  |    933 | //  };     
 | 
|  |    934 | //  y := start;
 | 
|  |    935 | //  x := x - 1
 | 
|  |    936 | // }
 | 
|  |    937 | // """
 | 
|  |    938 | 
 | 
|  |    939 | // @arg(doc = "Loops test")
 | 
|  |    940 | // @main
 | 
|  |    941 | // def loops() = {
 | 
|  |    942 | //   println("lexing Loops")
 | 
|  |    943 | //   println(escape(lexing_simp(WHILE_REGS, prog3)).mkString("\n"))
 | 
|  |    944 | // }
 | 
|  |    945 | 
 | 
|  |    946 | // @arg(doc = "Email Test")
 | 
|  |    947 | // @main
 | 
|  |    948 | // def email() = {
 | 
|  |    949 | //   val lower = "abcdefghijklmnopqrstuvwxyz"
 | 
|  |    950 | 
 | 
|  |    951 | //   val NAME = RECD("name", PLUS(RANGE(lower ++ "_.-")))
 | 
|  |    952 | //   val DOMAIN = RECD("domain", PLUS(RANGE(lower ++ "-")))
 | 
|  |    953 | //   val RE = RANGE(lower ++ ".")
 | 
|  |    954 | //   val TOPLEVEL = RECD("top", (RE ~ RE) |
 | 
|  |    955 | //                              (RE ~ RE ~ RE) | 
 | 
|  |    956 | //                              (RE ~ RE ~ RE ~ RE) | 
 | 
|  |    957 | //                              (RE ~ RE ~ RE ~ RE ~ RE) |
 | 
|  |    958 | //                              (RE ~ RE ~ RE ~ RE ~ RE ~ RE))
 | 
|  |    959 | 
 | 
|  |    960 | //   val EMAIL = NAME ~ "@" ~ DOMAIN ~ "." ~ TOPLEVEL
 | 
|  |    961 | 
 | 
|  |    962 | //   println(lexing_simp(EMAIL, "christian.urban@kcl.ac.uk"))
 | 
|  |    963 | // }
 | 
|  |    964 | 
 | 
|  |    965 | 
 | 
|  |    966 | // @arg(doc = "All tests.")
 | 
|  |    967 | // @main
 | 
|  |    968 | // def all() = { small(); fib() ; loops() ; email() } 
 | 
|  |    969 | 
 | 
|  |    970 | 
 | 
|  |    971 | 
 | 
|  |    972 | 
 | 
|  |    973 | // runs with amm2 and amm3
 | 
|  |    974 | 
 | 
|  |    975 | 
 | 
|  |    976 | 
 |