403
+ − 1
// A simple lexer inspired by work of Sulzmann & Lu
+ − 2
//==================================================
+ − 3
//
+ − 4
// Call the test cases with
+ − 5
//
+ − 6
// amm lexer.sc small
+ − 7
// amm lexer.sc fib
+ − 8
// amm lexer.sc loops
+ − 9
// amm lexer.sc email
+ − 10
//
+ − 11
// amm lexer.sc all
+ − 12
+ − 13
+ − 14
// regular expressions including records
+ − 15
abstract class Rexp
+ − 16
case object ZERO extends Rexp
+ − 17
case object ONE extends Rexp
+ − 18
case object ANYCHAR extends Rexp
+ − 19
case class CHAR(c: Char) extends Rexp
+ − 20
case class ALTS(r1: Rexp, r2: Rexp) extends Rexp
+ − 21
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+ − 22
case class STAR(r: Rexp) extends Rexp
+ − 23
case class RECD(x: String, r: Rexp) extends Rexp
+ − 24
case class NTIMES(n: Int, r: Rexp) extends Rexp
+ − 25
case class OPTIONAL(r: Rexp) extends Rexp
+ − 26
case class NOT(r: Rexp) extends Rexp
+ − 27
// records for extracting strings or tokens
+ − 28
+ − 29
// values
+ − 30
abstract class Val
+ − 31
case object Empty extends Val
+ − 32
case class Chr(c: Char) extends Val
+ − 33
case class Sequ(v1: Val, v2: Val) extends Val
+ − 34
case class Left(v: Val) extends Val
+ − 35
case class Right(v: Val) extends Val
+ − 36
case class Stars(vs: List[Val]) extends Val
+ − 37
case class Rec(x: String, v: Val) extends Val
+ − 38
case class Ntime(vs: List[Val]) extends Val
+ − 39
case class Optionall(v: Val) extends Val
+ − 40
case class Nots(s: String) extends Val
+ − 41
+ − 42
+ − 43
+ − 44
abstract class Bit
+ − 45
case object Z extends Bit
+ − 46
case object S extends Bit
+ − 47
+ − 48
+ − 49
type Bits = List[Bit]
+ − 50
+ − 51
abstract class ARexp
+ − 52
case object AZERO extends ARexp
+ − 53
case class AONE(bs: Bits) extends ARexp
+ − 54
case class ACHAR(bs: Bits, c: Char) extends ARexp
+ − 55
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp
+ − 56
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp
+ − 57
case class ASTAR(bs: Bits, r: ARexp) extends ARexp
+ − 58
case class ANOT(bs: Bits, r: ARexp) extends ARexp
+ − 59
case class AANYCHAR(bs: Bits) extends ARexp
+ − 60
+ − 61
+ − 62
+ − 63
// some convenience for typing in regular expressions
+ − 64
+ − 65
def charlist2rexp(s : List[Char]): Rexp = s match {
+ − 66
case Nil => ONE
+ − 67
case c::Nil => CHAR(c)
+ − 68
case c::s => SEQ(CHAR(c), charlist2rexp(s))
+ − 69
}
+ − 70
implicit def string2rexp(s : String) : Rexp =
+ − 71
charlist2rexp(s.toList)
+ − 72
+ − 73
implicit def RexpOps(r: Rexp) = new {
+ − 74
def | (s: Rexp) = ALTS(r, s)
+ − 75
def % = STAR(r)
+ − 76
def ~ (s: Rexp) = SEQ(r, s)
+ − 77
}
+ − 78
+ − 79
implicit def stringOps(s: String) = new {
+ − 80
def | (r: Rexp) = ALTS(s, r)
+ − 81
def | (r: String) = ALTS(s, r)
+ − 82
def % = STAR(s)
+ − 83
def ~ (r: Rexp) = SEQ(s, r)
+ − 84
def ~ (r: String) = SEQ(s, r)
+ − 85
def $ (r: Rexp) = RECD(s, r)
+ − 86
}
+ − 87
+ − 88
def nullable(r: Rexp) : Boolean = r match {
+ − 89
case ZERO => false
+ − 90
case ONE => true
+ − 91
case CHAR(_) => false
+ − 92
case ANYCHAR => false
+ − 93
case ALTS(r1, r2) => nullable(r1) || nullable(r2)
+ − 94
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ − 95
case STAR(_) => true
+ − 96
case RECD(_, r1) => nullable(r1)
+ − 97
case NTIMES(n, r) => if (n == 0) true else nullable(r)
+ − 98
case OPTIONAL(r) => true
+ − 99
case NOT(r) => !nullable(r)
+ − 100
}
+ − 101
+ − 102
def der(c: Char, r: Rexp) : Rexp = r match {
+ − 103
case ZERO => ZERO
+ − 104
case ONE => ZERO
+ − 105
case CHAR(d) => if (c == d) ONE else ZERO
+ − 106
case ANYCHAR => ONE
+ − 107
case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
+ − 108
case SEQ(r1, r2) =>
+ − 109
if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
+ − 110
else SEQ(der(c, r1), r2)
+ − 111
case STAR(r) => SEQ(der(c, r), STAR(r))
+ − 112
case RECD(_, r1) => der(c, r1)
+ − 113
case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
+ − 114
case OPTIONAL(r) => der(c, r)
+ − 115
case NOT(r) => NOT(der(c, r))
+ − 116
}
+ − 117
+ − 118
+ − 119
// extracts a string from a value
+ − 120
def flatten(v: Val) : String = v match {
+ − 121
case Empty => ""
+ − 122
case Chr(c) => c.toString
+ − 123
case Left(v) => flatten(v)
+ − 124
case Right(v) => flatten(v)
+ − 125
case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
+ − 126
case Stars(vs) => vs.map(flatten).mkString
+ − 127
case Ntime(vs) => vs.map(flatten).mkString
+ − 128
case Optionall(v) => flatten(v)
+ − 129
case Rec(_, v) => flatten(v)
+ − 130
}
+ − 131
+ − 132
+ − 133
// extracts an environment from a value;
+ − 134
// used for tokenising a string
+ − 135
def env(v: Val) : List[(String, String)] = v match {
+ − 136
case Empty => Nil
+ − 137
case Chr(c) => Nil
+ − 138
case Left(v) => env(v)
+ − 139
case Right(v) => env(v)
+ − 140
case Sequ(v1, v2) => env(v1) ::: env(v2)
+ − 141
case Stars(vs) => vs.flatMap(env)
+ − 142
case Ntime(vs) => vs.flatMap(env)
+ − 143
case Rec(x, v) => (x, flatten(v))::env(v)
+ − 144
case Optionall(v) => env(v)
+ − 145
case Nots(s) => ("Negative", s) :: Nil
+ − 146
}
+ − 147
+ − 148
+ − 149
// The injection and mkeps part of the lexer
+ − 150
//===========================================
+ − 151
+ − 152
def mkeps(r: Rexp) : Val = r match {
+ − 153
case ONE => Empty
+ − 154
case ALTS(r1, r2) =>
+ − 155
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+ − 156
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+ − 157
case STAR(r) => Stars(Nil)
+ − 158
case RECD(x, r) => Rec(x, mkeps(r))
+ − 159
case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
+ − 160
case OPTIONAL(r) => Optionall(Empty)
+ − 161
case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")
+ − 162
else Nots("")//Nots(s.reverse.toString)
+ − 163
// case NOT(ZERO) => Empty
+ − 164
// case NOT(CHAR(c)) => Empty
+ − 165
// case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
+ − 166
// case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
+ − 167
// case NOT(STAR(r)) => Stars(Nil)
+ − 168
+ − 169
}
+ − 170
+ − 171
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+ − 172
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+ − 173
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+ − 174
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+ − 175
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+ − 176
case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
+ − 177
case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
+ − 178
case (CHAR(d), Empty) => Chr(c)
+ − 179
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+ − 180
case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
+ − 181
case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
+ − 182
case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
+ − 183
case (ANYCHAR, Empty) => Chr(c)
+ − 184
}
+ − 185
+ − 186
// some "rectification" functions for simplification
+ − 187
def F_ID(v: Val): Val = v
+ − 188
def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
+ − 189
def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
+ − 190
def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
+ − 191
case Right(v) => Right(f2(v))
+ − 192
case Left(v) => Left(f1(v))
+ − 193
}
+ − 194
def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
+ − 195
case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
+ − 196
}
+ − 197
def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
+ − 198
(v:Val) => Sequ(f1(Empty), f2(v))
+ − 199
def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
+ − 200
(v:Val) => Sequ(f1(v), f2(Empty))
+ − 201
+ − 202
def F_ERROR(v: Val): Val = throw new Exception("error")
+ − 203
+ − 204
// simplification
+ − 205
def simp(r: Rexp): (Rexp, Val => Val) = r match {
+ − 206
case ALTS(r1, r2) => {
+ − 207
val (r1s, f1s) = simp(r1)
+ − 208
val (r2s, f2s) = simp(r2)
+ − 209
(r1s, r2s) match {
+ − 210
case (ZERO, _) => (r2s, F_RIGHT(f2s))
+ − 211
case (_, ZERO) => (r1s, F_LEFT(f1s))
+ − 212
case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
+ − 213
else (ALTS (r1s, r2s), F_ALT(f1s, f2s))
+ − 214
}
+ − 215
}
+ − 216
case SEQ(r1, r2) => {
+ − 217
val (r1s, f1s) = simp(r1)
+ − 218
val (r2s, f2s) = simp(r2)
+ − 219
(r1s, r2s) match {
+ − 220
case (ZERO, _) => (ZERO, F_ERROR)
+ − 221
case (_, ZERO) => (ZERO, F_ERROR)
+ − 222
case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
+ − 223
case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
+ − 224
case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
+ − 225
}
+ − 226
}
+ − 227
case r => (r, F_ID)
+ − 228
}
+ − 229
+ − 230
// lexing functions including simplification
+ − 231
def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
+ − 232
case Nil => if (nullable(r)) mkeps(r) else
+ − 233
{ throw new Exception(s"lexing error $r not nullable") }
+ − 234
case c::cs => {
+ − 235
val (r_simp, f_simp) = simp(der(c, r))
+ − 236
inj(r, c, f_simp(lex_simp(r_simp, cs)))
+ − 237
}
+ − 238
}
+ − 239
+ − 240
def lexing_simp(r: Rexp, s: String) =
+ − 241
env(lex_simp(r, s.toList))
+ − 242
+ − 243
// The Lexing Rules for the WHILE Language
+ − 244
+ − 245
def PLUS(r: Rexp) = r ~ r.%
+ − 246
+ − 247
def Range(s : List[Char]) : Rexp = s match {
+ − 248
case Nil => ZERO
+ − 249
case c::Nil => CHAR(c)
+ − 250
case c::s => ALTS(CHAR(c), Range(s))
+ − 251
}
+ − 252
def RANGE(s: String) = Range(s.toList)
+ − 253
+ − 254
val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_")
+ − 255
val DIGIT = RANGE("0123456789")
+ − 256
val ID = SYM ~ (SYM | DIGIT).%
+ − 257
val NUM = PLUS(DIGIT)
+ − 258
val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write"
+ − 259
val SEMI: Rexp = ";"
+ − 260
val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
+ − 261
val WHITESPACE = PLUS(" " | "\n" | "\t" | "\r")
+ − 262
val RPAREN: Rexp = "{"
+ − 263
val LPAREN: Rexp = "}"
+ − 264
val STRING: Rexp = "\"" ~ SYM.% ~ "\""
+ − 265
+ − 266
+ − 267
//ab \ a --> 1b
+ − 268
//
+ − 269
val WHILE_REGS = (("k" $ KEYWORD) |
+ − 270
("i" $ ID) |
+ − 271
("o" $ OP) |
+ − 272
("n" $ NUM) |
+ − 273
("s" $ SEMI) |
+ − 274
("str" $ STRING) |
+ − 275
("p" $ (LPAREN | RPAREN)) |
+ − 276
("w" $ WHITESPACE)).%
+ − 277
+ − 278
val NREGS = NTIMES(5, OPTIONAL(SYM))
+ − 279
val NREGS1 = ("test" $ NREGS)
+ − 280
// Two Simple While Tests
+ − 281
//========================
+ − 282
val NOTREG = "hehe" ~ NOT((ANYCHAR.%) ~ "haha" ~ (ANYCHAR.%))
+ − 283
+ − 284
+ − 285
// bnullable function: tests whether the aregular
+ − 286
// expression can recognise the empty string
+ − 287
def bnullable (r: ARexp) : Boolean = r match {
+ − 288
case AZERO => false
+ − 289
case AONE(_) => true
+ − 290
case ACHAR(_,_) => false
+ − 291
case AALTS(_, rs) => rs.exists(bnullable)
+ − 292
case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
+ − 293
case ASTAR(_, _) => true
+ − 294
case ANOT(_, rn) => !bnullable(rn)
+ − 295
}
+ − 296
+ − 297
def mkepsBC(r: ARexp) : Bits = r match {
+ − 298
case AONE(bs) => bs
+ − 299
case AALTS(bs, rs) => {
+ − 300
val n = rs.indexWhere(bnullable)
+ − 301
bs ++ mkepsBC(rs(n))
+ − 302
}
+ − 303
case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
+ − 304
case ASTAR(bs, r) => bs ++ List(Z)
+ − 305
case ANOT(bs, rn) => bs
+ − 306
}
+ − 307
+ − 308
+ − 309
def bder(c: Char, r: ARexp) : ARexp = r match {
+ − 310
case AZERO => AZERO
+ − 311
case AONE(_) => AZERO
+ − 312
case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
+ − 313
case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
+ − 314
case ASEQ(bs, r1, r2) =>
+ − 315
if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
+ − 316
else ASEQ(bs, bder(c, r1), r2)
+ − 317
case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
+ − 318
case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
+ − 319
case AANYCHAR(bs) => AONE(bs)
+ − 320
}
+ − 321
+ − 322
def fuse(bs: Bits, r: ARexp) : ARexp = r match {
+ − 323
case AZERO => AZERO
+ − 324
case AONE(cs) => AONE(bs ++ cs)
+ − 325
case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
+ − 326
case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
+ − 327
case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
+ − 328
case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
+ − 329
case ANOT(cs, r) => ANOT(bs ++ cs, r)
+ − 330
}
+ − 331
+ − 332
+ − 333
def internalise(r: Rexp) : ARexp = r match {
+ − 334
case ZERO => AZERO
+ − 335
case ONE => AONE(Nil)
+ − 336
case CHAR(c) => ACHAR(Nil, c)
+ − 337
//case PRED(f) => APRED(Nil, f)
+ − 338
case ALTS(r1, r2) =>
+ − 339
AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
+ − 340
// case ALTS(r1::rs) => {
+ − 341
// val AALTS(Nil, rs2) = internalise(ALTS(rs))
+ − 342
// AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
+ − 343
// }
+ − 344
case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
+ − 345
case STAR(r) => ASTAR(Nil, internalise(r))
+ − 346
case RECD(x, r) => internalise(r)
+ − 347
case NOT(r) => ANOT(Nil, internalise(r))
+ − 348
case ANYCHAR => AANYCHAR(Nil)
+ − 349
}
+ − 350
+ − 351
+ − 352
def bsimp(r: ARexp): ARexp =
+ − 353
{
+ − 354
r match {
+ − 355
case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
+ − 356
case (AZERO, _) => AZERO
+ − 357
case (_, AZERO) => AZERO
+ − 358
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 359
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 360
}
+ − 361
case AALTS(bs1, rs) => {
+ − 362
val rs_simp = rs.map(bsimp(_))
+ − 363
val flat_res = flats(rs_simp)
409
+ − 364
val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
403
+ − 365
dist_res match {
+ − 366
case Nil => AZERO
+ − 367
case s :: Nil => fuse(bs1, s)
+ − 368
case rs => AALTS(bs1, rs)
+ − 369
}
+ − 370
+ − 371
}
+ − 372
case r => r
+ − 373
}
+ − 374
}
409
+ − 375
def strongBsimp(r: ARexp): ARexp =
+ − 376
{
+ − 377
r match {
+ − 378
case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
+ − 379
case (AZERO, _) => AZERO
+ − 380
case (_, AZERO) => AZERO
+ − 381
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 382
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 383
}
+ − 384
case AALTS(bs1, rs) => {
+ − 385
val rs_simp = rs.map(strongBsimp(_))
+ − 386
val flat_res = flats(rs_simp)
412
+ − 387
val dist_res = distinctBy2(flat_res)//distinctBy(flat_res, erase)
409
+ − 388
dist_res match {
+ − 389
case Nil => AZERO
+ − 390
case s :: Nil => fuse(bs1, s)
+ − 391
case rs => AALTS(bs1, rs)
+ − 392
}
+ − 393
+ − 394
}
+ − 395
case r => r
+ − 396
}
+ − 397
}
+ − 398
403
+ − 399
def bders (s: List[Char], r: ARexp) : ARexp = s match {
+ − 400
case Nil => r
+ − 401
case c::s => bders(s, bder(c, r))
+ − 402
}
+ − 403
+ − 404
def flats(rs: List[ARexp]): List[ARexp] = rs match {
+ − 405
case Nil => Nil
+ − 406
case AZERO :: rs1 => flats(rs1)
+ − 407
case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
+ − 408
case r1 :: rs2 => r1 :: flats(rs2)
+ − 409
}
+ − 410
+ − 411
def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
+ − 412
case Nil => Nil
+ − 413
case (x::xs) => {
+ − 414
val res = f(x)
+ − 415
if (acc.contains(res)) distinctBy(xs, f, acc)
+ − 416
else x::distinctBy(xs, f, res::acc)
+ − 417
}
+ − 418
}
+ − 419
412
+ − 420
def projectFirstChild(rs: List[ARexp]) : List[ARexp] = rs match {
+ − 421
case (ASEQ(bs, r1, r2p)::rs1) => r1::projectFirstChild(rs1)
+ − 422
case Nil => Nil
+ − 423
}
+ − 424
+ − 425
+ − 426
def distinctBy2(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match {
+ − 427
case Nil => Nil
+ − 428
case (x::xs) => {
+ − 429
val res = erase(x)
+ − 430
if(acc.contains(res))
+ − 431
distinctBy2(xs, acc)
+ − 432
else
+ − 433
x match {
+ − 434
case ASEQ(bs0, AALTS(bs1, rs), r2) =>
+ − 435
val newTerms = distinctBy2(rs.map(r1 => ASEQ(Nil, r1, r2)), acc)
+ − 436
val rsPrime = projectFirstChild(newTerms)
+ − 437
newTerms match {
+ − 438
case Nil => distinctBy2(xs, acc)
+ − 439
case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy2(xs, erase(t)::acc)
+ − 440
case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy2(xs, newTerms.map(erase(_)):::acc)
+ − 441
}
+ − 442
case x => x::distinctBy2(xs, res::acc)
+ − 443
}
+ − 444
}
+ − 445
}
+ − 446
403
+ − 447
def prettyRexp(r: Rexp) : String = r match {
+ − 448
case STAR(r0) => s"${prettyRexp(r0)}*"
+ − 449
case SEQ(CHAR(c), r2) => c.toString ++ prettyRexp(r2)
+ − 450
case SEQ(r1, r2) => s"${prettyRexp(r1)}~${prettyRexp(r2)}"
+ − 451
case CHAR(c) => c.toString
+ − 452
case ANYCHAR => "."
+ − 453
// case NOT(r0) => s
+ − 454
}
+ − 455
+ − 456
def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
+ − 457
case (ONE, bs) => (Empty, bs)
+ − 458
case (CHAR(f), bs) => (Chr(f), bs)
+ − 459
case (ALTS(r1, r2), Z::bs1) => {
+ − 460
val (v, bs2) = decode_aux(r1, bs1)
+ − 461
(Left(v), bs2)
+ − 462
}
+ − 463
case (ALTS(r1, r2), S::bs1) => {
+ − 464
val (v, bs2) = decode_aux(r2, bs1)
+ − 465
(Right(v), bs2)
+ − 466
}
+ − 467
case (SEQ(r1, r2), bs) => {
+ − 468
val (v1, bs1) = decode_aux(r1, bs)
+ − 469
val (v2, bs2) = decode_aux(r2, bs1)
+ − 470
(Sequ(v1, v2), bs2)
+ − 471
}
+ − 472
case (STAR(r1), S::bs) => {
+ − 473
val (v, bs1) = decode_aux(r1, bs)
+ − 474
//println(v)
+ − 475
val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
+ − 476
(Stars(v::vs), bs2)
+ − 477
}
+ − 478
case (STAR(_), Z::bs) => (Stars(Nil), bs)
+ − 479
case (RECD(x, r1), bs) => {
+ − 480
val (v, bs1) = decode_aux(r1, bs)
+ − 481
(Rec(x, v), bs1)
+ − 482
}
+ − 483
case (NOT(r), bs) => (Nots(prettyRexp(r)), bs)
+ − 484
}
+ − 485
+ − 486
def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
+ − 487
case (v, Nil) => v
+ − 488
case _ => throw new Exception("Not decodable")
+ − 489
}
+ − 490
+ − 491
+ − 492
+ − 493
def blexSimp(r: Rexp, s: String) : List[Bit] = {
+ − 494
blex_simp(internalise(r), s.toList)
+ − 495
}
+ − 496
+ − 497
def blexing_simp(r: Rexp, s: String) : Val = {
+ − 498
val bit_code = blex_simp(internalise(r), s.toList)
+ − 499
decode(r, bit_code)
+ − 500
}
+ − 501
+ − 502
+ − 503
+ − 504
def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
+ − 505
case Nil => r
+ − 506
case c::s => bders_simp(s, bsimp(bder(c, r)))
+ − 507
}
+ − 508
+ − 509
def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
+ − 510
412
+ − 511
def bders_simpS(s: List[Char], r: ARexp) : ARexp = s match {
+ − 512
case Nil => r
+ − 513
case c::s => bders_simpS(s, strongBsimp(bder(c, r)))
+ − 514
}
+ − 515
+ − 516
def bdersSimpS(s: String, r: Rexp) : ARexp = bders_simpS(s.toList, internalise(r))
403
+ − 517
+ − 518
def erase(r:ARexp): Rexp = r match{
+ − 519
case AZERO => ZERO
+ − 520
case AONE(_) => ONE
+ − 521
case ACHAR(bs, c) => CHAR(c)
+ − 522
case AALTS(bs, Nil) => ZERO
+ − 523
case AALTS(bs, a::Nil) => erase(a)
+ − 524
case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
+ − 525
case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
+ − 526
case ASTAR(cs, r)=> STAR(erase(r))
+ − 527
case ANOT(bs, r) => NOT(erase(r))
+ − 528
case AANYCHAR(bs) => ANYCHAR
+ − 529
}
+ − 530
+ − 531
def breakHead(r: ARexp) : List[ARexp] = r match {
+ − 532
case AALTS(bs, rs) => rs
+ − 533
case r => r::Nil
+ − 534
}
+ − 535
+ − 536
def distinctByWithAcc[B, C](xs: List[B], f: B => C,
+ − 537
acc: List[C] = Nil, accB: List[B] = Nil): (List[B], List[C]) = xs match {
+ − 538
case Nil => (accB.reverse, acc)
+ − 539
case (x::xs) => {
+ − 540
val res = f(x)
+ − 541
if (acc.contains(res)) distinctByWithAcc(xs, f, acc, accB)
+ − 542
else distinctByWithAcc(xs, f, res::acc, x::accB)
+ − 543
}
+ − 544
}
+ − 545
412
+ − 546
//(r1+r2)r + (r2'+r3)r' ~> (r1+r2)r + (r3)r' (erase r = erase r') (erase r2 = erase r2')
+ − 547
//s = s1@s2 \in L R s1 \in L r2 s2 \in L r
+ − 548
//s = s3@s4 s3 \in L r1 s4 \in L r |s3| < |s1| \nexists s3' s4' s.t. s3'@s4' = s and s3' \in L r1 s4' \in L r
+ − 549
//(r1+r4)r ~> r1r +_usedToBeSeq r4r
+ − 550
// | ss7: "erase a01 = erase a02 ∧ (distinctBy as2 erase (set (map erase as1)) = as2p) ⟹
+ − 551
// (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2) (ASTAR bs02 a02)]@rsc) sâ†
+ − 552
// (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2p) (ASTAR bs02 a02)]@rsc)"
+ − 553
// | ss8: "erase a01 = erase a02 ∧ (distinctBy [a2] erase (set (map erase as1)) = []) ⟹
+ − 554
// (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs a2 (ASTAR bs02 a02)]@rsc) sâ†
+ − 555
// (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@rsc)"
+ − 556
// | ss9: "erase a01 = erase a02 ∧ (distinctBy as2 erase {erase a1} = as2p) ⟹
+ − 557
// (rsa@[ASEQ bs a1 (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2) (ASTAR bs02 a02)]@rsc) sâ†
+ − 558
// (rsa@[ASEQ bs a1 (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2p) (ASTAR bs02 a02)]@rsc)"
403
+ − 559
412
+ − 560
+ − 561
//# of top-level terms
+ − 562
// r1r2 -> r11r2 + r12 -> r21r2 + r22 + r12' -> 4 terms -> 5 terms -> 6..........
+ − 563
// if string long enough --> #| r1r2 \s s | > unbounded? No -> #| r1r2 \s s | < #| pders UNIV r1| + #|pders UNIV r2| <= awidth r1r2
+ − 564
//r* -> r1r* -> r21r* + r22r* -> 4 terms -> 8 terms..............
409
+ − 565
def strongDB(xs: List[ARexp],
403
+ − 566
acc1: List[Rexp] = Nil,
+ − 567
acc2 : List[(List[Rexp], Rexp)] = Nil): List[ARexp] = xs match {
+ − 568
case Nil => Nil
+ − 569
case (x::xs) =>
+ − 570
if(acc1.contains(erase(x)))
409
+ − 571
strongDB(xs, acc1, acc2)
403
+ − 572
else{
+ − 573
x match {
+ − 574
case ASTAR(bs0, r0) =>
+ − 575
val headList : List[ARexp] = List[ARexp](AONE(Nil))
+ − 576
val i = acc2.indexWhere(
+ − 577
r2stl => {val (r2s, tl) = r2stl; tl == erase(r0) }
+ − 578
)
+ − 579
if(i == -1){
409
+ − 580
x::strongDB(
403
+ − 581
xs, erase(x)::acc1, (ONE::Nil, erase(r0))::acc2
+ − 582
)
+ − 583
}
+ − 584
else{
+ − 585
val headListAlready = acc2(i)
+ − 586
val (newHeads, oldHeadsUpdated) =
+ − 587
distinctByWithAcc(headList, erase, headListAlready._1)
+ − 588
newHeads match{
+ − 589
case newHead::Nil =>
+ − 590
ASTAR(bs0, r0) ::
409
+ − 591
strongDB(xs, erase(x)::acc1,
412
+ − 592
acc2.updated(i, (oldHeadsUpdated, headListAlready._2)) )
403
+ − 593
case Nil =>
409
+ − 594
strongDB(xs, erase(x)::acc1,
403
+ − 595
acc2)
+ − 596
}
+ − 597
}
+ − 598
case ASEQ(bs, r1, ASTAR(bs0, r0)) =>
+ − 599
val headList = breakHead(r1)
+ − 600
val i = acc2.indexWhere(
+ − 601
r2stl => {val (r2s, tl) = r2stl; tl == erase(r0) }
+ − 602
)
+ − 603
if(i == -1){
409
+ − 604
x::strongDB(
403
+ − 605
xs, erase(x)::acc1, (headList.map(erase(_)), erase(r0))::acc2
+ − 606
)
+ − 607
}
+ − 608
else{
+ − 609
val headListAlready = acc2(i)
+ − 610
val (newHeads, oldHeadsUpdated) =
+ − 611
distinctByWithAcc(headList, erase, headListAlready._1)
+ − 612
newHeads match{
+ − 613
case newHead::Nil =>
+ − 614
ASEQ(bs, newHead, ASTAR(bs0, r0)) ::
409
+ − 615
strongDB(xs, erase(x)::acc1,
403
+ − 616
acc2.updated(i, (oldHeadsUpdated, headListAlready._2)) )//TODO: acc2 already contains headListAlready
+ − 617
case Nil =>
409
+ − 618
strongDB(xs, erase(x)::acc1,
403
+ − 619
acc2)
+ − 620
case hds => val AALTS(bsp, rsp) = r1
+ − 621
ASEQ(bs, AALTS(bsp, hds), ASTAR(bs0, r0)) ::
409
+ − 622
strongDB(xs, erase(x)::acc1,
403
+ − 623
acc2.updated(i, (oldHeadsUpdated, headListAlready._2)))
+ − 624
}
+ − 625
}
409
+ − 626
case rPrime => x::strongDB(xs, erase(x)::acc1, acc2)
403
+ − 627
}
+ − 628
+ − 629
}
+ − 630
+ − 631
}
+ − 632
412
+ − 633
def shortRexpOutput(r: Rexp) : String = r match {
+ − 634
case CHAR(c) => c.toString
+ − 635
case ONE => "1"
+ − 636
case ZERO => "0"
+ − 637
case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 638
case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
+ − 639
case STAR(r) => "[" ++ shortRexpOutput(r) ++ "]*"
+ − 640
//case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
+ − 641
//case RTOP => "RTOP"
+ − 642
}
+ − 643
403
+ − 644
+ − 645
def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 646
case Nil => {
+ − 647
if (bnullable(r)) {
+ − 648
//println(asize(r))
+ − 649
val bits = mkepsBC(r)
+ − 650
bits
+ − 651
}
+ − 652
else throw new Exception("Not matched")
+ − 653
}
+ − 654
case c::cs => {
+ − 655
val der_res = bder(c,r)
+ − 656
val simp_res = bsimp(der_res)
+ − 657
blex_simp(simp_res, cs)
+ − 658
}
+ − 659
}
+ − 660
def size(r: Rexp) : Int = r match {
+ − 661
case ZERO => 1
+ − 662
case ONE => 1
+ − 663
case CHAR(_) => 1
+ − 664
case ANYCHAR => 1
+ − 665
case NOT(r0) => 1 + size(r0)
+ − 666
case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 667
case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
+ − 668
case STAR(r) => 1 + size(r)
+ − 669
}
+ − 670
+ − 671
def asize(a: ARexp) = size(erase(a))
+ − 672
412
+ − 673
//pder related
+ − 674
type Mon = (Char, Rexp)
+ − 675
type Lin = Set[Mon]
+ − 676
+ − 677
def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
+ − 678
case ZERO => Set()
+ − 679
case ONE => rs
+ − 680
case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) )
+ − 681
}
+ − 682
def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
+ − 683
case ZERO => Set()
+ − 684
case ONE => l
+ − 685
case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) } )
+ − 686
}
+ − 687
def lf(r: Rexp): Lin = r match {
+ − 688
case ZERO => Set()
+ − 689
case ONE => Set()
+ − 690
case CHAR(f) => {
+ − 691
//val Some(c) = alphabet.find(f)
+ − 692
Set((f, ONE))
+ − 693
}
+ − 694
case ALTS(r1, r2) => {
+ − 695
lf(r1 ) ++ lf(r2)
+ − 696
}
+ − 697
case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
+ − 698
case SEQ(r1, r2) =>{
+ − 699
if (nullable(r1))
+ − 700
cir_prod(lf(r1), r2) ++ lf(r2)
+ − 701
else
+ − 702
cir_prod(lf(r1), r2)
+ − 703
}
+ − 704
}
+ − 705
def lfs(r: Set[Rexp]): Lin = {
+ − 706
r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
+ − 707
}
+ − 708
+ − 709
def pder(x: Char, t: Rexp): Set[Rexp] = {
+ − 710
val lft = lf(t)
+ − 711
(lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
+ − 712
}
+ − 713
def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
+ − 714
case x::xs => pders(xs, pder(x, t))
+ − 715
case Nil => Set(t)
+ − 716
}
+ − 717
def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
+ − 718
case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 719
case Nil => ts
+ − 720
}
+ − 721
def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
+ − 722
def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
+ − 723
//all implementation of partial derivatives that involve set union are potentially buggy
+ − 724
//because they don't include the original regular term before they are pdered.
+ − 725
//now only pderas is fixed.
+ − 726
def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
+ − 727
def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r))
+ − 728
def awidth(r: Rexp) : Int = r match {
+ − 729
case CHAR(c) => 1
+ − 730
case SEQ(r1, r2) => awidth(r1) + awidth(r2)
+ − 731
case ALTS(r1, r2) => awidth(r1) + awidth(r2)
+ − 732
case ONE => 0
+ − 733
case ZERO => 0
+ − 734
case STAR(r) => awidth(r)
+ − 735
}
+ − 736
//def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
+ − 737
//def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
+ − 738
def o(r: Rexp) = if (nullable(r)) ONE else ZERO
+ − 739
//def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
+ − 740
def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
+ − 741
case ZERO => Set[Rexp]()
+ − 742
case ONE => Set[Rexp]()
+ − 743
case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
+ − 744
case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
+ − 745
case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
+ − 746
case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
+ − 747
}
+ − 748
def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
+ − 749
case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 750
case Nil => ts
+ − 751
}
+ − 752
def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
+ − 753
+ − 754
403
+ − 755
+ − 756
// @arg(doc = "small tests")
412
+ − 757
val STARREG = (((STAR("a") | (STAR("aa")) | STAR(STAR("aaa") | STAR("aaaa")) | STAR("aaaaa") | (STAR("aaaaaa")) | STAR("aaaaaaa") | STAR("aaaaaaaa")).%).%).%
+ − 758
//(STAR("a") | ( STAR("aaa")) | STAR("aaaa") | STAR("aaaaa")).%.%.%
403
+ − 759
+ − 760
@main
+ − 761
def small() = {
+ − 762
412
+ − 763
403
+ − 764
// println(lexing_simp(NOTREG, prog0))
+ − 765
// val v = lex_simp(NOTREG, prog0.toList)
+ − 766
// println(v)
+ − 767
+ − 768
// val d = (lex_simp(NOTREG, prog0.toList))
+ − 769
// println(d)
412
+ − 770
val pderSTAR = pderUNIV(STARREG)
+ − 771
val refSize = pderSTAR.map(size(_)).sum
+ − 772
println(refSize)
+ − 773
for(i <- 10 to 100){
+ − 774
val prog0 = "a" * i
+ − 775
println(s"test: $prog0")
+ − 776
val bd = bdersSimp(prog0, STARREG)//DB
+ − 777
val sbd = bdersSimpS(prog0, STARREG)//strongDB
403
+ − 778
412
+ − 779
+ − 780
// println(shortRexpOutput(erase(sbd)))
+ − 781
// println(shortRexpOutput(erase(bd)))
+ − 782
println("pdersize, original, strongSimp, simp")
+ − 783
println(refSize, size(STARREG), asize(sbd), asize(bd))
403
+ − 784
+ − 785
val vres = blexing_simp( STARREG, prog0)
+ − 786
println(vres)
412
+ − 787
}
403
+ − 788
// println(vs.length)
+ − 789
// println(vs)
+ − 790
+ − 791
+ − 792
// val prog1 = """read n; write n"""
+ − 793
// println(s"test: $prog1")
+ − 794
// println(lexing_simp(WHILE_REGS, prog1))
+ − 795
}
+ − 796
+ − 797
// // Bigger Tests
+ − 798
// //==============
+ − 799
+ − 800
// // escapes strings and prints them out as "", "\n" and so on
+ − 801
// def esc(raw: String): String = {
+ − 802
// import scala.reflect.runtime.universe._
+ − 803
// Literal(Constant(raw)).toString
+ − 804
// }
+ − 805
+ − 806
// def escape(tks: List[(String, String)]) =
+ − 807
// tks.map{ case (s1, s2) => (s1, esc(s2))}
+ − 808
+ − 809
+ − 810
// val prog2 = """
+ − 811
// write "Fib";
+ − 812
// read n;
+ − 813
// minus1 := 0;
+ − 814
// minus2 := 1;
+ − 815
// while n > 0 do {
+ − 816
// temp := minus2;
+ − 817
// minus2 := minus1 + minus2;
+ − 818
// minus1 := temp;
+ − 819
// n := n - 1
+ − 820
// };
+ − 821
// write "Result";
+ − 822
// write minus2
+ − 823
// """
+ − 824
+ − 825
// @arg(doc = "Fibonacci test")
+ − 826
// @main
+ − 827
// def fib() = {
+ − 828
// println("lexing fib program")
+ − 829
// println(escape(lexing_simp(WHILE_REGS, prog2)).mkString("\n"))
+ − 830
// }
+ − 831
+ − 832
+ − 833
// val prog3 = """
+ − 834
// start := 1000;
+ − 835
// x := start;
+ − 836
// y := start;
+ − 837
// z := start;
+ − 838
// while 0 < x do {
+ − 839
// while 0 < y do {
+ − 840
// while 0 < z do {
+ − 841
// z := z - 1
+ − 842
// };
+ − 843
// z := start;
+ − 844
// y := y - 1
+ − 845
// };
+ − 846
// y := start;
+ − 847
// x := x - 1
+ − 848
// }
+ − 849
// """
+ − 850
+ − 851
// @arg(doc = "Loops test")
+ − 852
// @main
+ − 853
// def loops() = {
+ − 854
// println("lexing Loops")
+ − 855
// println(escape(lexing_simp(WHILE_REGS, prog3)).mkString("\n"))
+ − 856
// }
+ − 857
+ − 858
// @arg(doc = "Email Test")
+ − 859
// @main
+ − 860
// def email() = {
+ − 861
// val lower = "abcdefghijklmnopqrstuvwxyz"
+ − 862
+ − 863
// val NAME = RECD("name", PLUS(RANGE(lower ++ "_.-")))
+ − 864
// val DOMAIN = RECD("domain", PLUS(RANGE(lower ++ "-")))
+ − 865
// val RE = RANGE(lower ++ ".")
+ − 866
// val TOPLEVEL = RECD("top", (RE ~ RE) |
+ − 867
// (RE ~ RE ~ RE) |
+ − 868
// (RE ~ RE ~ RE ~ RE) |
+ − 869
// (RE ~ RE ~ RE ~ RE ~ RE) |
+ − 870
// (RE ~ RE ~ RE ~ RE ~ RE ~ RE))
+ − 871
+ − 872
// val EMAIL = NAME ~ "@" ~ DOMAIN ~ "." ~ TOPLEVEL
+ − 873
+ − 874
// println(lexing_simp(EMAIL, "christian.urban@kcl.ac.uk"))
+ − 875
// }
+ − 876
+ − 877
+ − 878
// @arg(doc = "All tests.")
+ − 879
// @main
+ − 880
// def all() = { small(); fib() ; loops() ; email() }
+ − 881
+ − 882
+ − 883
+ − 884
+ − 885
// runs with amm2 and amm3
+ − 886
+ − 887
+ − 888