| 403 |      1 | // A simple lexer inspired by work of Sulzmann & Lu
 | 
|  |      2 | //==================================================
 | 
|  |      3 | //
 | 
|  |      4 | // Call the test cases with 
 | 
|  |      5 | //
 | 
|  |      6 | //   amm lexer.sc small
 | 
|  |      7 | //   amm lexer.sc fib
 | 
|  |      8 | //   amm lexer.sc loops
 | 
|  |      9 | //   amm lexer.sc email
 | 
|  |     10 | //
 | 
|  |     11 | //   amm lexer.sc all
 | 
|  |     12 | 
 | 
|  |     13 | 
 | 
|  |     14 | // regular expressions including records
 | 
|  |     15 | abstract class Rexp 
 | 
|  |     16 | case object ZERO extends Rexp
 | 
|  |     17 | case object ONE extends Rexp
 | 
|  |     18 | case object ANYCHAR extends Rexp
 | 
|  |     19 | case class CHAR(c: Char) extends Rexp
 | 
|  |     20 | case class ALTS(r1: Rexp, r2: Rexp) extends Rexp 
 | 
|  |     21 | case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
 | 
|  |     22 | case class STAR(r: Rexp) extends Rexp 
 | 
|  |     23 | case class RECD(x: String, r: Rexp) extends Rexp  
 | 
|  |     24 | case class NTIMES(n: Int, r: Rexp) extends Rexp
 | 
|  |     25 | case class OPTIONAL(r: Rexp) extends Rexp
 | 
|  |     26 | case class NOT(r: Rexp) extends Rexp
 | 
|  |     27 |                 // records for extracting strings or tokens
 | 
|  |     28 |   
 | 
|  |     29 | // values  
 | 
|  |     30 | abstract class Val
 | 
|  |     31 | case object Empty extends Val
 | 
|  |     32 | case class Chr(c: Char) extends Val
 | 
|  |     33 | case class Sequ(v1: Val, v2: Val) extends Val
 | 
|  |     34 | case class Left(v: Val) extends Val
 | 
|  |     35 | case class Right(v: Val) extends Val
 | 
|  |     36 | case class Stars(vs: List[Val]) extends Val
 | 
|  |     37 | case class Rec(x: String, v: Val) extends Val
 | 
|  |     38 | case class Ntime(vs: List[Val]) extends Val
 | 
|  |     39 | case class Optionall(v: Val) extends Val
 | 
|  |     40 | case class Nots(s: String) extends Val
 | 
|  |     41 | 
 | 
|  |     42 | 
 | 
|  |     43 | 
 | 
|  |     44 | abstract class Bit
 | 
|  |     45 | case object Z extends Bit
 | 
|  |     46 | case object S extends Bit
 | 
|  |     47 | 
 | 
|  |     48 | 
 | 
|  |     49 | type Bits = List[Bit]
 | 
|  |     50 | 
 | 
|  |     51 | abstract class ARexp 
 | 
|  |     52 | case object AZERO extends ARexp
 | 
|  |     53 | case class AONE(bs: Bits) extends ARexp
 | 
|  |     54 | case class ACHAR(bs: Bits, c: Char) extends ARexp
 | 
|  |     55 | case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
 | 
|  |     56 | case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
 | 
|  |     57 | case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
 | 
|  |     58 | case class ANOT(bs: Bits, r: ARexp) extends ARexp
 | 
|  |     59 | case class AANYCHAR(bs: Bits) extends ARexp
 | 
|  |     60 | 
 | 
|  |     61 | 
 | 
|  |     62 |    
 | 
|  |     63 | // some convenience for typing in regular expressions
 | 
|  |     64 | 
 | 
|  |     65 | def charlist2rexp(s : List[Char]): Rexp = s match {
 | 
|  |     66 |   case Nil => ONE
 | 
|  |     67 |   case c::Nil => CHAR(c)
 | 
|  |     68 |   case c::s => SEQ(CHAR(c), charlist2rexp(s))
 | 
|  |     69 | }
 | 
|  |     70 | implicit def string2rexp(s : String) : Rexp = 
 | 
|  |     71 |   charlist2rexp(s.toList)
 | 
|  |     72 | 
 | 
|  |     73 | implicit def RexpOps(r: Rexp) = new {
 | 
|  |     74 |   def | (s: Rexp) = ALTS(r, s)
 | 
|  |     75 |   def % = STAR(r)
 | 
|  |     76 |   def ~ (s: Rexp) = SEQ(r, s)
 | 
|  |     77 | }
 | 
|  |     78 | 
 | 
|  |     79 | implicit def stringOps(s: String) = new {
 | 
|  |     80 |   def | (r: Rexp) = ALTS(s, r)
 | 
|  |     81 |   def | (r: String) = ALTS(s, r)
 | 
|  |     82 |   def % = STAR(s)
 | 
|  |     83 |   def ~ (r: Rexp) = SEQ(s, r)
 | 
|  |     84 |   def ~ (r: String) = SEQ(s, r)
 | 
|  |     85 |   def $ (r: Rexp) = RECD(s, r)
 | 
|  |     86 | }
 | 
|  |     87 | 
 | 
|  |     88 | def nullable(r: Rexp) : Boolean = r match {
 | 
|  |     89 |   case ZERO => false
 | 
|  |     90 |   case ONE => true
 | 
|  |     91 |   case CHAR(_) => false
 | 
|  |     92 |   case ANYCHAR => false
 | 
|  |     93 |   case ALTS(r1, r2) => nullable(r1) || nullable(r2)
 | 
|  |     94 |   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
 | 
|  |     95 |   case STAR(_) => true
 | 
|  |     96 |   case RECD(_, r1) => nullable(r1)
 | 
|  |     97 |   case NTIMES(n, r) => if (n == 0) true else nullable(r)
 | 
|  |     98 |   case OPTIONAL(r) => true
 | 
|  |     99 |   case NOT(r) => !nullable(r)
 | 
|  |    100 | }
 | 
|  |    101 | 
 | 
|  |    102 | def der(c: Char, r: Rexp) : Rexp = r match {
 | 
|  |    103 |   case ZERO => ZERO
 | 
|  |    104 |   case ONE => ZERO
 | 
|  |    105 |   case CHAR(d) => if (c == d) ONE else ZERO
 | 
|  |    106 |   case ANYCHAR => ONE 
 | 
|  |    107 |   case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
 | 
|  |    108 |   case SEQ(r1, r2) => 
 | 
|  |    109 |     if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
 | 
|  |    110 |     else SEQ(der(c, r1), r2)
 | 
|  |    111 |   case STAR(r) => SEQ(der(c, r), STAR(r))
 | 
|  |    112 |   case RECD(_, r1) => der(c, r1)
 | 
|  |    113 |   case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
 | 
|  |    114 |   case OPTIONAL(r) => der(c, r)
 | 
|  |    115 |   case NOT(r) =>  NOT(der(c, r))
 | 
|  |    116 | }
 | 
|  |    117 | 
 | 
|  |    118 | 
 | 
|  |    119 | // extracts a string from a value
 | 
|  |    120 | def flatten(v: Val) : String = v match {
 | 
|  |    121 |   case Empty => ""
 | 
|  |    122 |   case Chr(c) => c.toString
 | 
|  |    123 |   case Left(v) => flatten(v)
 | 
|  |    124 |   case Right(v) => flatten(v)
 | 
|  |    125 |   case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
 | 
|  |    126 |   case Stars(vs) => vs.map(flatten).mkString
 | 
|  |    127 |   case Ntime(vs) => vs.map(flatten).mkString
 | 
|  |    128 |   case Optionall(v) => flatten(v)
 | 
|  |    129 |   case Rec(_, v) => flatten(v)
 | 
|  |    130 | }
 | 
|  |    131 | 
 | 
|  |    132 | 
 | 
|  |    133 | // extracts an environment from a value;
 | 
|  |    134 | // used for tokenising a string
 | 
|  |    135 | def env(v: Val) : List[(String, String)] = v match {
 | 
|  |    136 |   case Empty => Nil
 | 
|  |    137 |   case Chr(c) => Nil
 | 
|  |    138 |   case Left(v) => env(v)
 | 
|  |    139 |   case Right(v) => env(v)
 | 
|  |    140 |   case Sequ(v1, v2) => env(v1) ::: env(v2)
 | 
|  |    141 |   case Stars(vs) => vs.flatMap(env)
 | 
|  |    142 |   case Ntime(vs) => vs.flatMap(env)
 | 
|  |    143 |   case Rec(x, v) => (x, flatten(v))::env(v)
 | 
|  |    144 |   case Optionall(v) => env(v)
 | 
|  |    145 |   case Nots(s) => ("Negative", s) :: Nil
 | 
|  |    146 | }
 | 
|  |    147 | 
 | 
|  |    148 | 
 | 
|  |    149 | // The injection and mkeps part of the lexer
 | 
|  |    150 | //===========================================
 | 
|  |    151 | 
 | 
|  |    152 | def mkeps(r: Rexp) : Val = r match {
 | 
|  |    153 |   case ONE => Empty
 | 
|  |    154 |   case ALTS(r1, r2) => 
 | 
|  |    155 |     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
 | 
|  |    156 |   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
 | 
|  |    157 |   case STAR(r) => Stars(Nil)
 | 
|  |    158 |   case RECD(x, r) => Rec(x, mkeps(r))
 | 
|  |    159 |   case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
 | 
|  |    160 |   case OPTIONAL(r) => Optionall(Empty)
 | 
|  |    161 |   case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")  
 | 
|  |    162 |                          else Nots("")//Nots(s.reverse.toString)
 | 
|  |    163 | //   case NOT(ZERO) => Empty
 | 
|  |    164 | //   case NOT(CHAR(c)) => Empty
 | 
|  |    165 | //   case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
 | 
|  |    166 | //   case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
 | 
|  |    167 | //   case NOT(STAR(r)) => Stars(Nil) 
 | 
|  |    168 | 
 | 
|  |    169 | }
 | 
|  |    170 | 
 | 
|  |    171 | def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
 | 
|  |    172 |   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
 | 
|  |    173 |   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
 | 
|  |    174 |   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
 | 
|  |    175 |   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
 | 
|  |    176 |   case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
 | 
|  |    177 |   case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
 | 
|  |    178 |   case (CHAR(d), Empty) => Chr(c) 
 | 
|  |    179 |   case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
 | 
|  |    180 |   case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
 | 
|  |    181 |   case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
 | 
|  |    182 |   case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
 | 
|  |    183 |   case (ANYCHAR, Empty) => Chr(c)
 | 
|  |    184 | }
 | 
|  |    185 | 
 | 
|  |    186 | // some "rectification" functions for simplification
 | 
|  |    187 | def F_ID(v: Val): Val = v
 | 
|  |    188 | def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
 | 
|  |    189 | def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
 | 
|  |    190 | def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
 | 
|  |    191 |   case Right(v) => Right(f2(v))
 | 
|  |    192 |   case Left(v) => Left(f1(v))
 | 
|  |    193 | }
 | 
|  |    194 | def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
 | 
|  |    195 |   case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
 | 
|  |    196 | }
 | 
|  |    197 | def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
 | 
|  |    198 |   (v:Val) => Sequ(f1(Empty), f2(v))
 | 
|  |    199 | def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
 | 
|  |    200 |   (v:Val) => Sequ(f1(v), f2(Empty))
 | 
|  |    201 | 
 | 
|  |    202 | def F_ERROR(v: Val): Val = throw new Exception("error")
 | 
|  |    203 | 
 | 
|  |    204 | // simplification
 | 
|  |    205 | def simp(r: Rexp): (Rexp, Val => Val) = r match {
 | 
|  |    206 |   case ALTS(r1, r2) => {
 | 
|  |    207 |     val (r1s, f1s) = simp(r1)
 | 
|  |    208 |     val (r2s, f2s) = simp(r2)
 | 
|  |    209 |     (r1s, r2s) match {
 | 
|  |    210 |       case (ZERO, _) => (r2s, F_RIGHT(f2s))
 | 
|  |    211 |       case (_, ZERO) => (r1s, F_LEFT(f1s))
 | 
|  |    212 |       case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
 | 
|  |    213 |                 else (ALTS (r1s, r2s), F_ALT(f1s, f2s)) 
 | 
|  |    214 |     }
 | 
|  |    215 |   }
 | 
|  |    216 |   case SEQ(r1, r2) => {
 | 
|  |    217 |     val (r1s, f1s) = simp(r1)
 | 
|  |    218 |     val (r2s, f2s) = simp(r2)
 | 
|  |    219 |     (r1s, r2s) match {
 | 
|  |    220 |       case (ZERO, _) => (ZERO, F_ERROR)
 | 
|  |    221 |       case (_, ZERO) => (ZERO, F_ERROR)
 | 
|  |    222 |       case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
 | 
|  |    223 |       case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
 | 
|  |    224 |       case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
 | 
|  |    225 |     }
 | 
|  |    226 |   }
 | 
|  |    227 |   case r => (r, F_ID)
 | 
|  |    228 | }
 | 
|  |    229 | 
 | 
|  |    230 | // lexing functions including simplification
 | 
|  |    231 | def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
 | 
|  |    232 |   case Nil => if (nullable(r)) mkeps(r) else 
 | 
|  |    233 |     { throw new Exception(s"lexing error $r not nullable") } 
 | 
|  |    234 |   case c::cs => {
 | 
|  |    235 |     val (r_simp, f_simp) = simp(der(c, r))
 | 
|  |    236 |     inj(r, c, f_simp(lex_simp(r_simp, cs)))
 | 
|  |    237 |   }
 | 
|  |    238 | }
 | 
|  |    239 | 
 | 
|  |    240 | def lexing_simp(r: Rexp, s: String) = 
 | 
|  |    241 |   env(lex_simp(r, s.toList))
 | 
|  |    242 | 
 | 
|  |    243 | // The Lexing Rules for the WHILE Language
 | 
|  |    244 | 
 | 
|  |    245 | def PLUS(r: Rexp) = r ~ r.%
 | 
|  |    246 | 
 | 
|  |    247 | def Range(s : List[Char]) : Rexp = s match {
 | 
|  |    248 |   case Nil => ZERO
 | 
|  |    249 |   case c::Nil => CHAR(c)
 | 
|  |    250 |   case c::s => ALTS(CHAR(c), Range(s))
 | 
|  |    251 | }
 | 
|  |    252 | def RANGE(s: String) = Range(s.toList)
 | 
|  |    253 | 
 | 
|  |    254 | val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_")
 | 
|  |    255 | val DIGIT = RANGE("0123456789")
 | 
|  |    256 | val ID = SYM ~ (SYM | DIGIT).% 
 | 
|  |    257 | val NUM = PLUS(DIGIT)
 | 
|  |    258 | val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" 
 | 
|  |    259 | val SEMI: Rexp = ";"
 | 
|  |    260 | val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
 | 
|  |    261 | val WHITESPACE = PLUS(" " | "\n" | "\t" | "\r")
 | 
|  |    262 | val RPAREN: Rexp = "{"
 | 
|  |    263 | val LPAREN: Rexp = "}"
 | 
|  |    264 | val STRING: Rexp = "\"" ~ SYM.% ~ "\""
 | 
|  |    265 | 
 | 
|  |    266 | 
 | 
|  |    267 | //ab \ a --> 1b
 | 
|  |    268 | //
 | 
|  |    269 | val WHILE_REGS = (("k" $ KEYWORD) | 
 | 
|  |    270 |                   ("i" $ ID) | 
 | 
|  |    271 |                   ("o" $ OP) | 
 | 
|  |    272 |                   ("n" $ NUM) | 
 | 
|  |    273 |                   ("s" $ SEMI) | 
 | 
|  |    274 |                   ("str" $ STRING) |
 | 
|  |    275 |                   ("p" $ (LPAREN | RPAREN)) | 
 | 
|  |    276 |                   ("w" $ WHITESPACE)).%
 | 
|  |    277 | 
 | 
|  |    278 | val NREGS = NTIMES(5, OPTIONAL(SYM))
 | 
|  |    279 | val NREGS1 = ("test" $ NREGS)
 | 
|  |    280 | // Two Simple While Tests
 | 
|  |    281 | //========================
 | 
|  |    282 | val NOTREG = "hehe" ~ NOT((ANYCHAR.%) ~ "haha" ~ (ANYCHAR.%))
 | 
|  |    283 | 
 | 
|  |    284 | 
 | 
|  |    285 |   // bnullable function: tests whether the aregular 
 | 
|  |    286 |   // expression can recognise the empty string
 | 
|  |    287 | def bnullable (r: ARexp) : Boolean = r match {
 | 
|  |    288 |     case AZERO => false
 | 
|  |    289 |     case AONE(_) => true
 | 
|  |    290 |     case ACHAR(_,_) => false
 | 
|  |    291 |     case AALTS(_, rs) => rs.exists(bnullable)
 | 
|  |    292 |     case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
 | 
|  |    293 |     case ASTAR(_, _) => true
 | 
|  |    294 |     case ANOT(_, rn) => !bnullable(rn)
 | 
|  |    295 |   }
 | 
|  |    296 | 
 | 
|  |    297 | def mkepsBC(r: ARexp) : Bits = r match {
 | 
|  |    298 |     case AONE(bs) => bs
 | 
|  |    299 |     case AALTS(bs, rs) => {
 | 
|  |    300 |       val n = rs.indexWhere(bnullable)
 | 
|  |    301 |       bs ++ mkepsBC(rs(n))
 | 
|  |    302 |     }
 | 
|  |    303 |     case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
 | 
|  |    304 |     case ASTAR(bs, r) => bs ++ List(Z)
 | 
|  |    305 |     case ANOT(bs, rn) => bs
 | 
|  |    306 |   }
 | 
|  |    307 | 
 | 
|  |    308 | 
 | 
|  |    309 | def bder(c: Char, r: ARexp) : ARexp = r match {
 | 
|  |    310 |     case AZERO => AZERO
 | 
|  |    311 |     case AONE(_) => AZERO
 | 
|  |    312 |     case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
 | 
|  |    313 |     case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
 | 
|  |    314 |     case ASEQ(bs, r1, r2) => 
 | 
|  |    315 |       if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
 | 
|  |    316 |       else ASEQ(bs, bder(c, r1), r2)
 | 
|  |    317 |     case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
 | 
|  |    318 |     case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
 | 
|  |    319 |     case AANYCHAR(bs) => AONE(bs)
 | 
|  |    320 |   } 
 | 
|  |    321 | 
 | 
|  |    322 | def fuse(bs: Bits, r: ARexp) : ARexp = r match {
 | 
|  |    323 |     case AZERO => AZERO
 | 
|  |    324 |     case AONE(cs) => AONE(bs ++ cs)
 | 
|  |    325 |     case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
 | 
|  |    326 |     case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
 | 
|  |    327 |     case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
 | 
|  |    328 |     case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
 | 
|  |    329 |     case ANOT(cs, r) => ANOT(bs ++ cs, r)
 | 
|  |    330 |   }
 | 
|  |    331 | 
 | 
|  |    332 | 
 | 
|  |    333 | def internalise(r: Rexp) : ARexp = r match {
 | 
|  |    334 |     case ZERO => AZERO
 | 
|  |    335 |     case ONE => AONE(Nil)
 | 
|  |    336 |     case CHAR(c) => ACHAR(Nil, c)
 | 
|  |    337 |     //case PRED(f) => APRED(Nil, f)
 | 
|  |    338 |     case ALTS(r1, r2) => 
 | 
|  |    339 |       AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
 | 
|  |    340 |     // case ALTS(r1::rs) => {
 | 
|  |    341 |     //   val AALTS(Nil, rs2) = internalise(ALTS(rs))
 | 
|  |    342 |     //   AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
 | 
|  |    343 |     // }
 | 
|  |    344 |     case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
 | 
|  |    345 |     case STAR(r) => ASTAR(Nil, internalise(r))
 | 
|  |    346 |     case RECD(x, r) => internalise(r)
 | 
|  |    347 |     case NOT(r) => ANOT(Nil, internalise(r))
 | 
|  |    348 |     case ANYCHAR => AANYCHAR(Nil)
 | 
|  |    349 |   }
 | 
|  |    350 | 
 | 
|  |    351 | 
 | 
|  |    352 | def bsimp(r: ARexp): ARexp = 
 | 
|  |    353 |   {
 | 
|  |    354 |     r match {
 | 
|  |    355 |       case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
 | 
|  |    356 |           case (AZERO, _) => AZERO
 | 
|  |    357 |           case (_, AZERO) => AZERO
 | 
|  |    358 |           case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
 | 
|  |    359 |           case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
 | 
|  |    360 |       }
 | 
|  |    361 |       case AALTS(bs1, rs) => {
 | 
|  |    362 |             val rs_simp = rs.map(bsimp(_))
 | 
|  |    363 |             val flat_res = flats(rs_simp)
 | 
| 409 |    364 |             val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
 | 
| 403 |    365 |             dist_res match {
 | 
|  |    366 |               case Nil => AZERO
 | 
|  |    367 |               case s :: Nil => fuse(bs1, s)
 | 
|  |    368 |               case rs => AALTS(bs1, rs)  
 | 
|  |    369 |             }
 | 
|  |    370 |           
 | 
|  |    371 |       }
 | 
|  |    372 |       case r => r
 | 
|  |    373 |     }
 | 
|  |    374 |   }
 | 
| 409 |    375 |   def strongBsimp(r: ARexp): ARexp =
 | 
|  |    376 |   {
 | 
|  |    377 |     r match {
 | 
|  |    378 |       case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
 | 
|  |    379 |           case (AZERO, _) => AZERO
 | 
|  |    380 |           case (_, AZERO) => AZERO
 | 
|  |    381 |           case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
 | 
|  |    382 |           case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
 | 
|  |    383 |       }
 | 
|  |    384 |       case AALTS(bs1, rs) => {
 | 
|  |    385 |             val rs_simp = rs.map(strongBsimp(_))
 | 
|  |    386 |             val flat_res = flats(rs_simp)
 | 
|  |    387 |             val dist_res = strongDB(flat_res)//distinctBy(flat_res, erase)
 | 
|  |    388 |             dist_res match {
 | 
|  |    389 |               case Nil => AZERO
 | 
|  |    390 |               case s :: Nil => fuse(bs1, s)
 | 
|  |    391 |               case rs => AALTS(bs1, rs)  
 | 
|  |    392 |             }
 | 
|  |    393 |           
 | 
|  |    394 |       }
 | 
|  |    395 |       case r => r
 | 
|  |    396 |     }
 | 
|  |    397 |   }
 | 
|  |    398 | 
 | 
| 403 |    399 |   def bders (s: List[Char], r: ARexp) : ARexp = s match {
 | 
|  |    400 |     case Nil => r
 | 
|  |    401 |     case c::s => bders(s, bder(c, r))
 | 
|  |    402 |   }
 | 
|  |    403 | 
 | 
|  |    404 |   def flats(rs: List[ARexp]): List[ARexp] = rs match {
 | 
|  |    405 |       case Nil => Nil
 | 
|  |    406 |       case AZERO :: rs1 => flats(rs1)
 | 
|  |    407 |       case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
 | 
|  |    408 |       case r1 :: rs2 => r1 :: flats(rs2)
 | 
|  |    409 |     }
 | 
|  |    410 | 
 | 
|  |    411 |   def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
 | 
|  |    412 |     case Nil => Nil
 | 
|  |    413 |     case (x::xs) => {
 | 
|  |    414 |       val res = f(x)
 | 
|  |    415 |       if (acc.contains(res)) distinctBy(xs, f, acc)  
 | 
|  |    416 |       else x::distinctBy(xs, f, res::acc)
 | 
|  |    417 |     }
 | 
|  |    418 |   } 
 | 
|  |    419 | 
 | 
|  |    420 |   def prettyRexp(r: Rexp) : String = r match {
 | 
|  |    421 |       case STAR(r0) => s"${prettyRexp(r0)}*"
 | 
|  |    422 |       case SEQ(CHAR(c), r2) => c.toString ++ prettyRexp(r2)
 | 
|  |    423 |       case SEQ(r1, r2) => s"${prettyRexp(r1)}~${prettyRexp(r2)}"
 | 
|  |    424 |       case CHAR(c) => c.toString
 | 
|  |    425 |       case ANYCHAR => "."
 | 
|  |    426 |     //   case NOT(r0) => s
 | 
|  |    427 |   }
 | 
|  |    428 | 
 | 
|  |    429 |   def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
 | 
|  |    430 |     case (ONE, bs) => (Empty, bs)
 | 
|  |    431 |     case (CHAR(f), bs) => (Chr(f), bs)
 | 
|  |    432 |     case (ALTS(r1, r2), Z::bs1) => {
 | 
|  |    433 |         val (v, bs2) = decode_aux(r1, bs1)
 | 
|  |    434 |         (Left(v), bs2)
 | 
|  |    435 |     }
 | 
|  |    436 |     case (ALTS(r1, r2), S::bs1) => {
 | 
|  |    437 |         val (v, bs2) = decode_aux(r2, bs1)
 | 
|  |    438 |         (Right(v), bs2)
 | 
|  |    439 |     }
 | 
|  |    440 |     case (SEQ(r1, r2), bs) => {
 | 
|  |    441 |       val (v1, bs1) = decode_aux(r1, bs)
 | 
|  |    442 |       val (v2, bs2) = decode_aux(r2, bs1)
 | 
|  |    443 |       (Sequ(v1, v2), bs2)
 | 
|  |    444 |     }
 | 
|  |    445 |     case (STAR(r1), S::bs) => {
 | 
|  |    446 |       val (v, bs1) = decode_aux(r1, bs)
 | 
|  |    447 |       //println(v)
 | 
|  |    448 |       val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
 | 
|  |    449 |       (Stars(v::vs), bs2)
 | 
|  |    450 |     }
 | 
|  |    451 |     case (STAR(_), Z::bs) => (Stars(Nil), bs)
 | 
|  |    452 |     case (RECD(x, r1), bs) => {
 | 
|  |    453 |       val (v, bs1) = decode_aux(r1, bs)
 | 
|  |    454 |       (Rec(x, v), bs1)
 | 
|  |    455 |     }
 | 
|  |    456 |     case (NOT(r), bs) => (Nots(prettyRexp(r)), bs)
 | 
|  |    457 |   }
 | 
|  |    458 | 
 | 
|  |    459 |   def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
 | 
|  |    460 |     case (v, Nil) => v
 | 
|  |    461 |     case _ => throw new Exception("Not decodable")
 | 
|  |    462 |   }
 | 
|  |    463 | 
 | 
|  |    464 | 
 | 
|  |    465 | 
 | 
|  |    466 | def blexSimp(r: Rexp, s: String) : List[Bit] = {
 | 
|  |    467 |     blex_simp(internalise(r), s.toList)
 | 
|  |    468 | }
 | 
|  |    469 | 
 | 
|  |    470 | def blexing_simp(r: Rexp, s: String) : Val = {
 | 
|  |    471 |     val bit_code = blex_simp(internalise(r), s.toList)
 | 
|  |    472 |     decode(r, bit_code)
 | 
|  |    473 |   }
 | 
|  |    474 | 
 | 
|  |    475 | 
 | 
|  |    476 | 
 | 
|  |    477 |   def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
 | 
|  |    478 |     case Nil => r
 | 
|  |    479 |     case c::s => bders_simp(s, bsimp(bder(c, r)))
 | 
|  |    480 |   }
 | 
|  |    481 |   
 | 
|  |    482 |   def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
 | 
|  |    483 | 
 | 
|  |    484 | 
 | 
|  |    485 |   def erase(r:ARexp): Rexp = r match{
 | 
|  |    486 |     case AZERO => ZERO
 | 
|  |    487 |     case AONE(_) => ONE
 | 
|  |    488 |     case ACHAR(bs, c) => CHAR(c)
 | 
|  |    489 |     case AALTS(bs, Nil) => ZERO
 | 
|  |    490 |     case AALTS(bs, a::Nil) => erase(a)
 | 
|  |    491 |     case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
 | 
|  |    492 |     case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
 | 
|  |    493 |     case ASTAR(cs, r)=> STAR(erase(r))
 | 
|  |    494 |     case ANOT(bs, r) => NOT(erase(r))
 | 
|  |    495 |     case AANYCHAR(bs) => ANYCHAR
 | 
|  |    496 |   }
 | 
|  |    497 | 
 | 
|  |    498 | def breakHead(r: ARexp) : List[ARexp] = r match {
 | 
|  |    499 |     case AALTS(bs, rs) => rs
 | 
|  |    500 |     case r => r::Nil
 | 
|  |    501 | }
 | 
|  |    502 | 
 | 
|  |    503 | def distinctByWithAcc[B, C](xs: List[B], f: B => C, 
 | 
|  |    504 |     acc: List[C] = Nil, accB: List[B] = Nil): (List[B], List[C]) = xs match {
 | 
|  |    505 |     case Nil => (accB.reverse, acc)
 | 
|  |    506 |     case (x::xs) => {
 | 
|  |    507 |       val res = f(x)
 | 
|  |    508 |       if (acc.contains(res)) distinctByWithAcc(xs, f, acc, accB)  
 | 
|  |    509 |       else distinctByWithAcc(xs, f, res::acc, x::accB)
 | 
|  |    510 |     }
 | 
|  |    511 |   } 
 | 
|  |    512 | 
 | 
|  |    513 | 
 | 
| 409 |    514 |   def strongDB(xs: List[ARexp], 
 | 
| 403 |    515 |                        acc1: List[Rexp] = Nil, 
 | 
|  |    516 |                        acc2 : List[(List[Rexp], Rexp)] = Nil): List[ARexp] = xs match {
 | 
|  |    517 |     case Nil => Nil
 | 
|  |    518 |     case (x::xs) => 
 | 
|  |    519 |         if(acc1.contains(erase(x)))
 | 
| 409 |    520 |             strongDB(xs, acc1, acc2)
 | 
| 403 |    521 |         else{
 | 
|  |    522 |             x match {
 | 
|  |    523 |                 case ASTAR(bs0, r0) => 
 | 
|  |    524 |                     val headList : List[ARexp] = List[ARexp](AONE(Nil))
 | 
|  |    525 |                     val i = acc2.indexWhere(
 | 
|  |    526 |                         r2stl => {val (r2s, tl) = r2stl; tl == erase(r0) } 
 | 
|  |    527 |                     )
 | 
|  |    528 |                     if(i == -1){ 
 | 
| 409 |    529 |                         x::strongDB(
 | 
| 403 |    530 |                             xs, erase(x)::acc1, (ONE::Nil, erase(r0))::acc2
 | 
|  |    531 |                         )
 | 
|  |    532 |                     }
 | 
|  |    533 |                     else{
 | 
|  |    534 |                         val headListAlready = acc2(i)
 | 
|  |    535 |                         val (newHeads, oldHeadsUpdated) = 
 | 
|  |    536 |                                 distinctByWithAcc(headList, erase, headListAlready._1)
 | 
|  |    537 |                         newHeads match{
 | 
|  |    538 |                             case newHead::Nil =>
 | 
|  |    539 |                                 ASTAR(bs0, r0) :: 
 | 
| 409 |    540 |                                 strongDB(xs, erase(x)::acc1, 
 | 
| 403 |    541 |                                 acc2.updated(i, (oldHeadsUpdated, headListAlready._2)) )//TODO: acc2 already contains headListAlready
 | 
|  |    542 |                             case Nil =>
 | 
| 409 |    543 |                                 strongDB(xs, erase(x)::acc1, 
 | 
| 403 |    544 |                                 acc2)
 | 
|  |    545 |                         }
 | 
|  |    546 |                     }                
 | 
|  |    547 |                 case ASEQ(bs, r1, ASTAR(bs0, r0)) => 
 | 
|  |    548 |                     val headList = breakHead(r1)
 | 
|  |    549 |                     val i = acc2.indexWhere(
 | 
|  |    550 |                         r2stl => {val (r2s, tl) = r2stl; tl == erase(r0) } 
 | 
|  |    551 |                     )
 | 
|  |    552 |                     if(i == -1){ 
 | 
| 409 |    553 |                         x::strongDB(
 | 
| 403 |    554 |                             xs, erase(x)::acc1, (headList.map(erase(_)), erase(r0))::acc2
 | 
|  |    555 |                         )
 | 
|  |    556 |                     }
 | 
|  |    557 |                     else{
 | 
|  |    558 |                         val headListAlready = acc2(i)
 | 
|  |    559 |                         val (newHeads, oldHeadsUpdated) = 
 | 
|  |    560 |                                 distinctByWithAcc(headList, erase, headListAlready._1)
 | 
|  |    561 |                         newHeads match{
 | 
|  |    562 |                             case newHead::Nil =>
 | 
|  |    563 |                                 ASEQ(bs, newHead, ASTAR(bs0, r0)) :: 
 | 
| 409 |    564 |                                 strongDB(xs, erase(x)::acc1, 
 | 
| 403 |    565 |                                 acc2.updated(i, (oldHeadsUpdated, headListAlready._2)) )//TODO: acc2 already contains headListAlready
 | 
|  |    566 |                             case Nil =>
 | 
| 409 |    567 |                                 strongDB(xs, erase(x)::acc1, 
 | 
| 403 |    568 |                                 acc2)
 | 
|  |    569 |                             case hds => val AALTS(bsp, rsp) = r1
 | 
|  |    570 |                                 ASEQ(bs, AALTS(bsp, hds), ASTAR(bs0, r0)) ::
 | 
| 409 |    571 |                                 strongDB(xs, erase(x)::acc1,
 | 
| 403 |    572 |                                 acc2.updated(i, (oldHeadsUpdated, headListAlready._2)))
 | 
|  |    573 |                         }
 | 
|  |    574 |                     }
 | 
| 409 |    575 |                 case rPrime => x::strongDB(xs, erase(x)::acc1, acc2)    
 | 
| 403 |    576 |             }
 | 
|  |    577 |                 
 | 
|  |    578 |         }
 | 
|  |    579 |     
 | 
|  |    580 | }
 | 
|  |    581 | 
 | 
|  |    582 | 
 | 
|  |    583 | def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
 | 
|  |    584 |     case Nil => {
 | 
|  |    585 |       if (bnullable(r)) {
 | 
|  |    586 |         //println(asize(r))
 | 
|  |    587 |         val bits = mkepsBC(r)
 | 
|  |    588 |         bits
 | 
|  |    589 |       }
 | 
|  |    590 |     else throw new Exception("Not matched")
 | 
|  |    591 |     }
 | 
|  |    592 |     case c::cs => {
 | 
|  |    593 |       val der_res = bder(c,r)
 | 
|  |    594 |       val simp_res = bsimp(der_res)  
 | 
|  |    595 |       blex_simp(simp_res, cs)      
 | 
|  |    596 |     }
 | 
|  |    597 |   }
 | 
|  |    598 |   def size(r: Rexp) : Int = r match {
 | 
|  |    599 |     case ZERO => 1
 | 
|  |    600 |     case ONE => 1
 | 
|  |    601 |     case CHAR(_) => 1
 | 
|  |    602 |     case ANYCHAR => 1
 | 
|  |    603 |     case NOT(r0) => 1 + size(r0)
 | 
|  |    604 |     case SEQ(r1, r2) => 1 + size(r1) + size(r2)
 | 
|  |    605 |     case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
 | 
|  |    606 |     case STAR(r) => 1 + size(r)
 | 
|  |    607 |   }
 | 
|  |    608 | 
 | 
|  |    609 |   def asize(a: ARexp) = size(erase(a))
 | 
|  |    610 | 
 | 
|  |    611 | 
 | 
|  |    612 | // @arg(doc = "small tests")
 | 
| 409 |    613 | val STARREG = ((STAR("a") | STAR("aa") ).%).%
 | 
| 403 |    614 | 
 | 
|  |    615 | @main
 | 
|  |    616 | def small() = {
 | 
|  |    617 | 
 | 
| 409 |    618 |   val prog0 = """aaaaaaaaa"""
 | 
| 403 |    619 |   println(s"test: $prog0")
 | 
|  |    620 | //   println(lexing_simp(NOTREG, prog0))
 | 
|  |    621 | //   val v = lex_simp(NOTREG, prog0.toList)
 | 
|  |    622 | //   println(v)
 | 
|  |    623 | 
 | 
|  |    624 | //   val d =  (lex_simp(NOTREG, prog0.toList))
 | 
|  |    625 | //   println(d)
 | 
|  |    626 | 
 | 
|  |    627 |   val bd = bdersSimp(prog0, STARREG)
 | 
|  |    628 |     println(erase(bd))
 | 
|  |    629 |     println(asize(bd))
 | 
|  |    630 | 
 | 
|  |    631 |     val vres = blexing_simp( STARREG, prog0)
 | 
|  |    632 |     println(vres)
 | 
|  |    633 | //   println(vs.length)
 | 
|  |    634 | //   println(vs)
 | 
|  |    635 |    
 | 
|  |    636 | 
 | 
|  |    637 |   // val prog1 = """read  n; write n"""  
 | 
|  |    638 |   // println(s"test: $prog1")
 | 
|  |    639 |   // println(lexing_simp(WHILE_REGS, prog1))
 | 
|  |    640 | }
 | 
|  |    641 | 
 | 
|  |    642 | // // Bigger Tests
 | 
|  |    643 | // //==============
 | 
|  |    644 | 
 | 
|  |    645 | // // escapes strings and prints them out as "", "\n" and so on
 | 
|  |    646 | // def esc(raw: String): String = {
 | 
|  |    647 | //   import scala.reflect.runtime.universe._
 | 
|  |    648 | //   Literal(Constant(raw)).toString
 | 
|  |    649 | // }
 | 
|  |    650 | 
 | 
|  |    651 | // def escape(tks: List[(String, String)]) =
 | 
|  |    652 | //   tks.map{ case (s1, s2) => (s1, esc(s2))}
 | 
|  |    653 | 
 | 
|  |    654 | 
 | 
|  |    655 | // val prog2 = """
 | 
|  |    656 | // write "Fib";
 | 
|  |    657 | // read n;
 | 
|  |    658 | // minus1 := 0;
 | 
|  |    659 | // minus2 := 1;
 | 
|  |    660 | // while n > 0 do {
 | 
|  |    661 | //   temp := minus2;
 | 
|  |    662 | //   minus2 := minus1 + minus2;
 | 
|  |    663 | //   minus1 := temp;
 | 
|  |    664 | //   n := n - 1
 | 
|  |    665 | // };
 | 
|  |    666 | // write "Result";
 | 
|  |    667 | // write minus2
 | 
|  |    668 | // """
 | 
|  |    669 | 
 | 
|  |    670 | // @arg(doc = "Fibonacci test")
 | 
|  |    671 | // @main
 | 
|  |    672 | // def fib() = {
 | 
|  |    673 | //   println("lexing fib program")
 | 
|  |    674 | //   println(escape(lexing_simp(WHILE_REGS, prog2)).mkString("\n"))
 | 
|  |    675 | // }
 | 
|  |    676 | 
 | 
|  |    677 | 
 | 
|  |    678 | // val prog3 = """
 | 
|  |    679 | // start := 1000;
 | 
|  |    680 | // x := start;
 | 
|  |    681 | // y := start;
 | 
|  |    682 | // z := start;
 | 
|  |    683 | // while 0 < x do {
 | 
|  |    684 | //  while 0 < y do {
 | 
|  |    685 | //   while 0 < z do {
 | 
|  |    686 | //     z := z - 1
 | 
|  |    687 | //   };
 | 
|  |    688 | //   z := start;
 | 
|  |    689 | //   y := y - 1
 | 
|  |    690 | //  };     
 | 
|  |    691 | //  y := start;
 | 
|  |    692 | //  x := x - 1
 | 
|  |    693 | // }
 | 
|  |    694 | // """
 | 
|  |    695 | 
 | 
|  |    696 | // @arg(doc = "Loops test")
 | 
|  |    697 | // @main
 | 
|  |    698 | // def loops() = {
 | 
|  |    699 | //   println("lexing Loops")
 | 
|  |    700 | //   println(escape(lexing_simp(WHILE_REGS, prog3)).mkString("\n"))
 | 
|  |    701 | // }
 | 
|  |    702 | 
 | 
|  |    703 | // @arg(doc = "Email Test")
 | 
|  |    704 | // @main
 | 
|  |    705 | // def email() = {
 | 
|  |    706 | //   val lower = "abcdefghijklmnopqrstuvwxyz"
 | 
|  |    707 | 
 | 
|  |    708 | //   val NAME = RECD("name", PLUS(RANGE(lower ++ "_.-")))
 | 
|  |    709 | //   val DOMAIN = RECD("domain", PLUS(RANGE(lower ++ "-")))
 | 
|  |    710 | //   val RE = RANGE(lower ++ ".")
 | 
|  |    711 | //   val TOPLEVEL = RECD("top", (RE ~ RE) |
 | 
|  |    712 | //                              (RE ~ RE ~ RE) | 
 | 
|  |    713 | //                              (RE ~ RE ~ RE ~ RE) | 
 | 
|  |    714 | //                              (RE ~ RE ~ RE ~ RE ~ RE) |
 | 
|  |    715 | //                              (RE ~ RE ~ RE ~ RE ~ RE ~ RE))
 | 
|  |    716 | 
 | 
|  |    717 | //   val EMAIL = NAME ~ "@" ~ DOMAIN ~ "." ~ TOPLEVEL
 | 
|  |    718 | 
 | 
|  |    719 | //   println(lexing_simp(EMAIL, "christian.urban@kcl.ac.uk"))
 | 
|  |    720 | // }
 | 
|  |    721 | 
 | 
|  |    722 | 
 | 
|  |    723 | // @arg(doc = "All tests.")
 | 
|  |    724 | // @main
 | 
|  |    725 | // def all() = { small(); fib() ; loops() ; email() } 
 | 
|  |    726 | 
 | 
|  |    727 | 
 | 
|  |    728 | 
 | 
|  |    729 | 
 | 
|  |    730 | // runs with amm2 and amm3
 | 
|  |    731 | 
 | 
|  |    732 | 
 | 
|  |    733 | 
 |