431
+ − 1
// A simple lexer inspired by work of Sulzmann & Lu
+ − 2
//==================================================
+ − 3
//
+ − 4
// Call the test cases with
+ − 5
//
+ − 6
// amm lexer.sc small
+ − 7
// amm lexer.sc fib
+ − 8
// amm lexer.sc loops
+ − 9
// amm lexer.sc email
+ − 10
//
+ − 11
// amm lexer.sc all
+ − 12
+ − 13
+ − 14
// regular expressions including records
+ − 15
abstract class Rexp
+ − 16
case object ZERO extends Rexp
+ − 17
case object ONE extends Rexp
+ − 18
case object ANYCHAR extends Rexp
+ − 19
case class CHAR(c: Char) extends Rexp
+ − 20
case class ALTS(r1: Rexp, r2: Rexp) extends Rexp
+ − 21
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+ − 22
case class STAR(r: Rexp) extends Rexp
+ − 23
case class RECD(x: String, r: Rexp) extends Rexp
+ − 24
case class NTIMES(n: Int, r: Rexp) extends Rexp
+ − 25
case class OPTIONAL(r: Rexp) extends Rexp
+ − 26
case class NOT(r: Rexp) extends Rexp
+ − 27
// records for extracting strings or tokens
+ − 28
+ − 29
// values
+ − 30
abstract class Val
+ − 31
case object Empty extends Val
+ − 32
case class Chr(c: Char) extends Val
+ − 33
case class Sequ(v1: Val, v2: Val) extends Val
+ − 34
case class Left(v: Val) extends Val
+ − 35
case class Right(v: Val) extends Val
+ − 36
case class Stars(vs: List[Val]) extends Val
+ − 37
case class Rec(x: String, v: Val) extends Val
+ − 38
case class Ntime(vs: List[Val]) extends Val
+ − 39
case class Optionall(v: Val) extends Val
+ − 40
case class Nots(s: String) extends Val
+ − 41
+ − 42
+ − 43
+ − 44
abstract class Bit
+ − 45
case object Z extends Bit
+ − 46
case object S extends Bit
+ − 47
+ − 48
+ − 49
type Bits = List[Bit]
+ − 50
+ − 51
abstract class ARexp
+ − 52
case object AZERO extends ARexp
+ − 53
case class AONE(bs: Bits) extends ARexp
+ − 54
case class ACHAR(bs: Bits, c: Char) extends ARexp
+ − 55
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp
+ − 56
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp
+ − 57
case class ASTAR(bs: Bits, r: ARexp) extends ARexp
+ − 58
case class ANOT(bs: Bits, r: ARexp) extends ARexp
+ − 59
case class AANYCHAR(bs: Bits) extends ARexp
+ − 60
+ − 61
+ − 62
+ − 63
// some convenience for typing in regular expressions
+ − 64
+ − 65
def charlist2rexp(s : List[Char]): Rexp = s match {
+ − 66
case Nil => ONE
+ − 67
case c::Nil => CHAR(c)
+ − 68
case c::s => SEQ(CHAR(c), charlist2rexp(s))
+ − 69
}
+ − 70
implicit def string2rexp(s : String) : Rexp =
+ − 71
charlist2rexp(s.toList)
+ − 72
+ − 73
implicit def RexpOps(r: Rexp) = new {
+ − 74
def | (s: Rexp) = ALTS(r, s)
+ − 75
def % = STAR(r)
+ − 76
def ~ (s: Rexp) = SEQ(r, s)
+ − 77
}
+ − 78
+ − 79
implicit def stringOps(s: String) = new {
+ − 80
def | (r: Rexp) = ALTS(s, r)
+ − 81
def | (r: String) = ALTS(s, r)
+ − 82
def % = STAR(s)
+ − 83
def ~ (r: Rexp) = SEQ(s, r)
+ − 84
def ~ (r: String) = SEQ(s, r)
+ − 85
def $ (r: Rexp) = RECD(s, r)
+ − 86
}
+ − 87
+ − 88
def nullable(r: Rexp) : Boolean = r match {
+ − 89
case ZERO => false
+ − 90
case ONE => true
+ − 91
case CHAR(_) => false
+ − 92
case ANYCHAR => false
+ − 93
case ALTS(r1, r2) => nullable(r1) || nullable(r2)
+ − 94
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ − 95
case STAR(_) => true
+ − 96
case RECD(_, r1) => nullable(r1)
+ − 97
case NTIMES(n, r) => if (n == 0) true else nullable(r)
+ − 98
case OPTIONAL(r) => true
+ − 99
case NOT(r) => !nullable(r)
+ − 100
}
+ − 101
+ − 102
def der(c: Char, r: Rexp) : Rexp = r match {
+ − 103
case ZERO => ZERO
+ − 104
case ONE => ZERO
+ − 105
case CHAR(d) => if (c == d) ONE else ZERO
+ − 106
case ANYCHAR => ONE
+ − 107
case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
+ − 108
case SEQ(r1, r2) =>
+ − 109
if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
+ − 110
else SEQ(der(c, r1), r2)
+ − 111
case STAR(r) => SEQ(der(c, r), STAR(r))
+ − 112
case RECD(_, r1) => der(c, r1)
+ − 113
case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
+ − 114
case OPTIONAL(r) => der(c, r)
+ − 115
case NOT(r) => NOT(der(c, r))
+ − 116
}
+ − 117
+ − 118
+ − 119
// extracts a string from a value
+ − 120
def flatten(v: Val) : String = v match {
+ − 121
case Empty => ""
+ − 122
case Chr(c) => c.toString
+ − 123
case Left(v) => flatten(v)
+ − 124
case Right(v) => flatten(v)
+ − 125
case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
+ − 126
case Stars(vs) => vs.map(flatten).mkString
+ − 127
case Ntime(vs) => vs.map(flatten).mkString
+ − 128
case Optionall(v) => flatten(v)
+ − 129
case Rec(_, v) => flatten(v)
+ − 130
}
+ − 131
+ − 132
+ − 133
// extracts an environment from a value;
+ − 134
// used for tokenising a string
+ − 135
def env(v: Val) : List[(String, String)] = v match {
+ − 136
case Empty => Nil
+ − 137
case Chr(c) => Nil
+ − 138
case Left(v) => env(v)
+ − 139
case Right(v) => env(v)
+ − 140
case Sequ(v1, v2) => env(v1) ::: env(v2)
+ − 141
case Stars(vs) => vs.flatMap(env)
+ − 142
case Ntime(vs) => vs.flatMap(env)
+ − 143
case Rec(x, v) => (x, flatten(v))::env(v)
+ − 144
case Optionall(v) => env(v)
+ − 145
case Nots(s) => ("Negative", s) :: Nil
+ − 146
}
+ − 147
+ − 148
+ − 149
// The injection and mkeps part of the lexer
+ − 150
//===========================================
+ − 151
+ − 152
def mkeps(r: Rexp) : Val = r match {
+ − 153
case ONE => Empty
+ − 154
case ALTS(r1, r2) =>
+ − 155
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+ − 156
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+ − 157
case STAR(r) => Stars(Nil)
+ − 158
case RECD(x, r) => Rec(x, mkeps(r))
+ − 159
case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
+ − 160
case OPTIONAL(r) => Optionall(Empty)
+ − 161
case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")
+ − 162
else Nots("")//Nots(s.reverse.toString)
+ − 163
// case NOT(ZERO) => Empty
+ − 164
// case NOT(CHAR(c)) => Empty
+ − 165
// case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
+ − 166
// case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
+ − 167
// case NOT(STAR(r)) => Stars(Nil)
+ − 168
+ − 169
}
+ − 170
+ − 171
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+ − 172
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+ − 173
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+ − 174
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+ − 175
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+ − 176
case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
+ − 177
case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
+ − 178
case (CHAR(d), Empty) => Chr(c)
+ − 179
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+ − 180
case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
+ − 181
case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
+ − 182
case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
+ − 183
case (ANYCHAR, Empty) => Chr(c)
+ − 184
}
+ − 185
+ − 186
// some "rectification" functions for simplification
+ − 187
def F_ID(v: Val): Val = v
+ − 188
def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
+ − 189
def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
+ − 190
def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
+ − 191
case Right(v) => Right(f2(v))
+ − 192
case Left(v) => Left(f1(v))
+ − 193
}
+ − 194
def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
+ − 195
case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
+ − 196
}
+ − 197
def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
+ − 198
(v:Val) => Sequ(f1(Empty), f2(v))
+ − 199
def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
+ − 200
(v:Val) => Sequ(f1(v), f2(Empty))
+ − 201
+ − 202
def F_ERROR(v: Val): Val = throw new Exception("error")
+ − 203
+ − 204
// simplification
+ − 205
def simp(r: Rexp): (Rexp, Val => Val) = r match {
+ − 206
case ALTS(r1, r2) => {
+ − 207
val (r1s, f1s) = simp(r1)
+ − 208
val (r2s, f2s) = simp(r2)
+ − 209
(r1s, r2s) match {
+ − 210
case (ZERO, _) => (r2s, F_RIGHT(f2s))
+ − 211
case (_, ZERO) => (r1s, F_LEFT(f1s))
+ − 212
case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
+ − 213
else (ALTS (r1s, r2s), F_ALT(f1s, f2s))
+ − 214
}
+ − 215
}
+ − 216
case SEQ(r1, r2) => {
+ − 217
val (r1s, f1s) = simp(r1)
+ − 218
val (r2s, f2s) = simp(r2)
+ − 219
(r1s, r2s) match {
+ − 220
case (ZERO, _) => (ZERO, F_ERROR)
+ − 221
case (_, ZERO) => (ZERO, F_ERROR)
+ − 222
case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
+ − 223
case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
+ − 224
case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
+ − 225
}
+ − 226
}
+ − 227
case r => (r, F_ID)
+ − 228
}
+ − 229
+ − 230
// lexing functions including simplification
+ − 231
def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
+ − 232
case Nil => if (nullable(r)) mkeps(r) else
+ − 233
{ throw new Exception(s"lexing error $r not nullable") }
+ − 234
case c::cs => {
+ − 235
val (r_simp, f_simp) = simp(der(c, r))
+ − 236
inj(r, c, f_simp(lex_simp(r_simp, cs)))
+ − 237
}
+ − 238
}
+ − 239
+ − 240
def lexing_simp(r: Rexp, s: String) =
+ − 241
env(lex_simp(r, s.toList))
+ − 242
+ − 243
// The Lexing Rules for the WHILE Language
+ − 244
+ − 245
def PLUS(r: Rexp) = r ~ r.%
+ − 246
+ − 247
def Range(s : List[Char]) : Rexp = s match {
+ − 248
case Nil => ZERO
+ − 249
case c::Nil => CHAR(c)
+ − 250
case c::s => ALTS(CHAR(c), Range(s))
+ − 251
}
+ − 252
def RANGE(s: String) = Range(s.toList)
+ − 253
+ − 254
val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_")
+ − 255
val DIGIT = RANGE("0123456789")
+ − 256
val ID = SYM ~ (SYM | DIGIT).%
+ − 257
val NUM = PLUS(DIGIT)
+ − 258
val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write"
+ − 259
val SEMI: Rexp = ";"
+ − 260
val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
+ − 261
val WHITESPACE = PLUS(" " | "\n" | "\t" | "\r")
+ − 262
val RPAREN: Rexp = "{"
+ − 263
val LPAREN: Rexp = "}"
+ − 264
val STRING: Rexp = "\"" ~ SYM.% ~ "\""
+ − 265
+ − 266
+ − 267
//ab \ a --> 1b
+ − 268
//
+ − 269
val WHILE_REGS = (("k" $ KEYWORD) |
+ − 270
("i" $ ID) |
+ − 271
("o" $ OP) |
+ − 272
("n" $ NUM) |
+ − 273
("s" $ SEMI) |
+ − 274
("str" $ STRING) |
+ − 275
("p" $ (LPAREN | RPAREN)) |
+ − 276
("w" $ WHITESPACE)).%
+ − 277
+ − 278
val NREGS = NTIMES(5, OPTIONAL(SYM))
+ − 279
val NREGS1 = ("test" $ NREGS)
+ − 280
// Two Simple While Tests
+ − 281
//========================
+ − 282
val NOTREG = "hehe" ~ NOT((ANYCHAR.%) ~ "haha" ~ (ANYCHAR.%))
+ − 283
+ − 284
+ − 285
// bnullable function: tests whether the aregular
+ − 286
// expression can recognise the empty string
+ − 287
def bnullable (r: ARexp) : Boolean = r match {
+ − 288
case AZERO => false
+ − 289
case AONE(_) => true
+ − 290
case ACHAR(_,_) => false
+ − 291
case AALTS(_, rs) => rs.exists(bnullable)
+ − 292
case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
+ − 293
case ASTAR(_, _) => true
+ − 294
case ANOT(_, rn) => !bnullable(rn)
+ − 295
}
+ − 296
+ − 297
def mkepsBC(r: ARexp) : Bits = r match {
+ − 298
case AONE(bs) => bs
+ − 299
case AALTS(bs, rs) => {
+ − 300
val n = rs.indexWhere(bnullable)
+ − 301
bs ++ mkepsBC(rs(n))
+ − 302
}
+ − 303
case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
+ − 304
case ASTAR(bs, r) => bs ++ List(Z)
+ − 305
case ANOT(bs, rn) => bs
+ − 306
}
+ − 307
+ − 308
+ − 309
def bder(c: Char, r: ARexp) : ARexp = r match {
+ − 310
case AZERO => AZERO
+ − 311
case AONE(_) => AZERO
+ − 312
case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
+ − 313
case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
+ − 314
case ASEQ(bs, r1, r2) =>
+ − 315
if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
+ − 316
else ASEQ(bs, bder(c, r1), r2)
+ − 317
case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
+ − 318
case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
+ − 319
case AANYCHAR(bs) => AONE(bs)
+ − 320
}
+ − 321
+ − 322
def fuse(bs: Bits, r: ARexp) : ARexp = r match {
+ − 323
case AZERO => AZERO
+ − 324
case AONE(cs) => AONE(bs ++ cs)
+ − 325
case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
+ − 326
case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
+ − 327
case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
+ − 328
case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
+ − 329
case ANOT(cs, r) => ANOT(bs ++ cs, r)
+ − 330
}
+ − 331
+ − 332
+ − 333
def internalise(r: Rexp) : ARexp = r match {
+ − 334
case ZERO => AZERO
+ − 335
case ONE => AONE(Nil)
+ − 336
case CHAR(c) => ACHAR(Nil, c)
+ − 337
//case PRED(f) => APRED(Nil, f)
+ − 338
case ALTS(r1, r2) =>
+ − 339
AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
+ − 340
// case ALTS(r1::rs) => {
+ − 341
// val AALTS(Nil, rs2) = internalise(ALTS(rs))
+ − 342
// AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
+ − 343
// }
+ − 344
case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
+ − 345
case STAR(r) => ASTAR(Nil, internalise(r))
+ − 346
case RECD(x, r) => internalise(r)
+ − 347
case NOT(r) => ANOT(Nil, internalise(r))
+ − 348
case ANYCHAR => AANYCHAR(Nil)
+ − 349
}
+ − 350
+ − 351
+ − 352
def bsimp(r: ARexp): ARexp =
+ − 353
{
+ − 354
r match {
+ − 355
case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
+ − 356
case (AZERO, _) => AZERO
+ − 357
case (_, AZERO) => AZERO
+ − 358
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 359
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 360
}
+ − 361
case AALTS(bs1, rs) => {
+ − 362
val rs_simp = rs.map(bsimp(_))
+ − 363
val flat_res = flats(rs_simp)
+ − 364
val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
+ − 365
dist_res match {
+ − 366
case Nil => AZERO
+ − 367
case s :: Nil => fuse(bs1, s)
+ − 368
case rs => AALTS(bs1, rs)
+ − 369
}
+ − 370
+ − 371
}
+ − 372
case r => r
+ − 373
}
+ − 374
}
+ − 375
def strongBsimp(r: ARexp): ARexp =
+ − 376
{
+ − 377
r match {
+ − 378
case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
+ − 379
case (AZERO, _) => AZERO
+ − 380
case (_, AZERO) => AZERO
+ − 381
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 382
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 383
}
+ − 384
case AALTS(bs1, rs) => {
+ − 385
val rs_simp = rs.map(strongBsimp(_))
+ − 386
val flat_res = flats(rs_simp)
+ − 387
val dist_res = distinctBy4(flat_res)//distinctBy(flat_res, erase)
+ − 388
dist_res match {
+ − 389
case Nil => AZERO
+ − 390
case s :: Nil => fuse(bs1, s)
+ − 391
case rs => AALTS(bs1, rs)
+ − 392
}
+ − 393
+ − 394
}
+ − 395
case r => r
+ − 396
}
+ − 397
}
+ − 398
+ − 399
def bders (s: List[Char], r: ARexp) : ARexp = s match {
+ − 400
case Nil => r
+ − 401
case c::s => bders(s, bder(c, r))
+ − 402
}
+ − 403
+ − 404
def flats(rs: List[ARexp]): List[ARexp] = rs match {
+ − 405
case Nil => Nil
+ − 406
case AZERO :: rs1 => flats(rs1)
+ − 407
case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
+ − 408
case r1 :: rs2 => r1 :: flats(rs2)
+ − 409
}
+ − 410
+ − 411
def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
+ − 412
case Nil => Nil
+ − 413
case (x::xs) => {
+ − 414
val res = f(x)
+ − 415
if (acc.contains(res)) distinctBy(xs, f, acc)
+ − 416
else x::distinctBy(xs, f, res::acc)
+ − 417
}
+ − 418
}
+ − 419
+ − 420
def projectFirstChild(rs: List[ARexp]) : List[ARexp] = rs match {
+ − 421
case (ASEQ(bs, r1, r2p)::rs1) => r1::projectFirstChild(rs1)
+ − 422
case Nil => Nil
+ − 423
}
+ − 424
+ − 425
+ − 426
def distinctBy2(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match {
+ − 427
case Nil => Nil
+ − 428
case (x::xs) => {
+ − 429
val res = erase(x)
+ − 430
if(acc.contains(res))
+ − 431
distinctBy2(xs, acc)
+ − 432
else
+ − 433
x match {
+ − 434
case ASEQ(bs0, AALTS(bs1, rs), r2) =>
+ − 435
val newTerms = distinctBy2(rs.map(r1 => ASEQ(Nil, r1, r2)), acc)
+ − 436
val rsPrime = projectFirstChild(newTerms)
+ − 437
newTerms match {
+ − 438
case Nil => distinctBy2(xs, acc)
+ − 439
case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy2(xs, erase(t)::acc)
+ − 440
case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy2(xs, newTerms.map(erase(_)):::acc)
+ − 441
}
+ − 442
case x => x::distinctBy2(xs, res::acc)
+ − 443
}
+ − 444
}
+ − 445
}
+ − 446
+ − 447
def deepFlts(r: ARexp): List[ARexp] = r match{
+ − 448
+ − 449
case ASEQ(bs, r1, r2) => deepFlts(r1).map(r1p => ASEQ(bs, r1p, r2))
+ − 450
case ASTAR(bs, r0) => List(r)
+ − 451
case ACHAR(_, _) => List(r)
+ − 452
case AALTS(bs, rs) => rs.flatMap(deepFlts(_))//throw new Error("doubly nested alts in bsimp r")
+ − 453
}
+ − 454
+ − 455
+ − 456
def distinctBy3(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match {
+ − 457
case Nil => Nil
+ − 458
case (x::xs) => {
+ − 459
val res = erase(x)
+ − 460
if(acc.contains(res))
+ − 461
distinctBy3(xs, acc)
+ − 462
else
+ − 463
x match {
+ − 464
case ASEQ(bs0, AALTS(bs1, rs), r2) =>
+ − 465
val newTerms = distinctBy3(rs.flatMap(deepFlts(_)).map(r1 => ASEQ(Nil, r1, r2)), acc)//deepFlts(rs.flatMap(r1 => ASEQ(Nil, r1, r2)), acc)
+ − 466
val rsPrime = projectFirstChild(newTerms)
+ − 467
newTerms match {
+ − 468
case Nil => distinctBy3(xs, acc)
+ − 469
case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy3(xs, erase(t)::acc)
+ − 470
case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy3(xs, newTerms.map(erase(_)):::acc)
+ − 471
}
+ − 472
case x => x::distinctBy3(xs, res::acc)
+ − 473
}
+ − 474
}
+ − 475
}
+ − 476
+ − 477
def pruneRexp(r: ARexp, allowableTerms: List[Rexp]) : ARexp = {
+ − 478
r match {
+ − 479
case ASEQ(bs, r1, r2) =>
+ − 480
val termsTruncated = allowableTerms.collect(rt => rt match {
432
+ − 481
case SEQ(r1p, r2p) if(r2p == erase(r2)) => r1p//if(r2p == erase(r2))
431
+ − 482
})
+ − 483
val pruned : ARexp = pruneRexp(r1, termsTruncated)
+ − 484
pruned match {
+ − 485
case AZERO => AZERO
+ − 486
case AONE(bs1) => fuse(bs ++ bs1, r2)
+ − 487
case pruned1 => ASEQ(bs, pruned1, r2)
+ − 488
}
+ − 489
+ − 490
case AALTS(bs, rs) =>
+ − 491
//allowableTerms.foreach(a => println(shortRexpOutput(a)))
432
+ − 492
val rsp = rs.map(r =>
+ − 493
pruneRexp(r, allowableTerms)
+ − 494
)
+ − 495
.filter(r => r != AZERO)
431
+ − 496
rsp match {
+ − 497
case Nil => AZERO
+ − 498
case r1::Nil => fuse(bs, r1)
+ − 499
case rs1 => AALTS(bs, rs1)
+ − 500
}
+ − 501
case r =>
+ − 502
if(allowableTerms.contains(erase(r))) r else AZERO //assert(r != AZERO)
+ − 503
}
+ − 504
}
+ − 505
+ − 506
def oneSimp(r: Rexp) : Rexp = r match {
+ − 507
case SEQ(ONE, r) => r
+ − 508
case SEQ(r1, r2) => SEQ(oneSimp(r1), r2)
+ − 509
case r => r//assert r != 0
+ − 510
+ − 511
}
+ − 512
+ − 513
+ − 514
def distinctBy4(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
432
+ − 515
431
+ − 516
case Nil => Nil
+ − 517
case x :: xs =>
432
+ − 518
//assert(acc.distinct == acc)
431
+ − 519
val erased = erase(x)
+ − 520
if(acc.contains(erased))
+ − 521
distinctBy4(xs, acc)
+ − 522
else{
+ − 523
val addToAcc = breakIntoTerms(erased).filter(r => !acc.contains(oneSimp(r)))
432
+ − 524
//val xp = pruneRexp(x, addToAcc)
+ − 525
pruneRexp(x, addToAcc) match {
431
+ − 526
case AZERO => distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 527
case xPrime => xPrime :: distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 528
}
+ − 529
+ − 530
}
+ − 531
}
+ − 532
+ − 533
+ − 534
def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
+ − 535
case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
+ − 536
// breakIntoTerms(r1).map(r11 => r11 match {
+ − 537
// case ONE => r2
+ − 538
// case r => SEQ(r11, r2)
+ − 539
// }
+ − 540
//)
+ − 541
case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
+ − 542
case _ => r::Nil
+ − 543
}
+ − 544
+ − 545
def prettyRexp(r: Rexp) : String = r match {
+ − 546
case STAR(r0) => s"${prettyRexp(r0)}*"
+ − 547
case SEQ(CHAR(c), r2) => c.toString ++ prettyRexp(r2)
+ − 548
case SEQ(r1, r2) => s"${prettyRexp(r1)}~${prettyRexp(r2)}"
+ − 549
case CHAR(c) => c.toString
+ − 550
case ANYCHAR => "."
+ − 551
// case NOT(r0) => s
+ − 552
}
+ − 553
+ − 554
def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
+ − 555
case (ONE, bs) => (Empty, bs)
+ − 556
case (CHAR(f), bs) => (Chr(f), bs)
+ − 557
case (ALTS(r1, r2), Z::bs1) => {
+ − 558
val (v, bs2) = decode_aux(r1, bs1)
+ − 559
(Left(v), bs2)
+ − 560
}
+ − 561
case (ALTS(r1, r2), S::bs1) => {
+ − 562
val (v, bs2) = decode_aux(r2, bs1)
+ − 563
(Right(v), bs2)
+ − 564
}
+ − 565
case (SEQ(r1, r2), bs) => {
+ − 566
val (v1, bs1) = decode_aux(r1, bs)
+ − 567
val (v2, bs2) = decode_aux(r2, bs1)
+ − 568
(Sequ(v1, v2), bs2)
+ − 569
}
+ − 570
case (STAR(r1), S::bs) => {
+ − 571
val (v, bs1) = decode_aux(r1, bs)
+ − 572
//println(v)
+ − 573
val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
+ − 574
(Stars(v::vs), bs2)
+ − 575
}
+ − 576
case (STAR(_), Z::bs) => (Stars(Nil), bs)
+ − 577
case (RECD(x, r1), bs) => {
+ − 578
val (v, bs1) = decode_aux(r1, bs)
+ − 579
(Rec(x, v), bs1)
+ − 580
}
+ − 581
case (NOT(r), bs) => (Nots(prettyRexp(r)), bs)
+ − 582
}
+ − 583
+ − 584
def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
+ − 585
case (v, Nil) => v
+ − 586
case _ => throw new Exception("Not decodable")
+ − 587
}
+ − 588
+ − 589
+ − 590
+ − 591
def blexSimp(r: Rexp, s: String) : List[Bit] = {
+ − 592
blex_simp(internalise(r), s.toList)
+ − 593
}
+ − 594
+ − 595
def blexing_simp(r: Rexp, s: String) : Val = {
+ − 596
val bit_code = blex_simp(internalise(r), s.toList)
+ − 597
decode(r, bit_code)
+ − 598
}
+ − 599
+ − 600
def strong_blexing_simp(r: Rexp, s: String) : Val = {
+ − 601
decode(r, strong_blex_simp(internalise(r), s.toList))
+ − 602
}
+ − 603
+ − 604
def strong_blex_simp(r: ARexp, s: List[Char]) :Bits = s match {
+ − 605
case Nil => {
+ − 606
if (bnullable(r)) {
+ − 607
//println(asize(r))
+ − 608
val bits = mkepsBC(r)
+ − 609
bits
+ − 610
}
+ − 611
else
+ − 612
throw new Exception("Not matched")
+ − 613
}
+ − 614
case c::cs => {
+ − 615
val der_res = bder(c,r)
+ − 616
val simp_res = strongBsimp(der_res)
+ − 617
strong_blex_simp(simp_res, cs)
+ − 618
}
+ − 619
}
+ − 620
+ − 621
+ − 622
+ − 623
def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
+ − 624
case Nil => r
435
+ − 625
case c::s =>
+ − 626
println(erase(r))
+ − 627
bders_simp(s, bsimp(bder(c, r)))
431
+ − 628
}
+ − 629
+ − 630
def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
+ − 631
+ − 632
def bders_simpS(s: List[Char], r: ARexp) : ARexp = s match {
+ − 633
case Nil => r
+ − 634
case c::s => bders_simpS(s, strongBsimp(bder(c, r)))
+ − 635
}
+ − 636
+ − 637
def bdersSimpS(s: String, r: Rexp) : ARexp = bders_simpS(s.toList, internalise(r))
+ − 638
+ − 639
def erase(r:ARexp): Rexp = r match{
+ − 640
case AZERO => ZERO
+ − 641
case AONE(_) => ONE
+ − 642
case ACHAR(bs, c) => CHAR(c)
+ − 643
case AALTS(bs, Nil) => ZERO
+ − 644
case AALTS(bs, a::Nil) => erase(a)
+ − 645
case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
+ − 646
case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
+ − 647
case ASTAR(cs, r)=> STAR(erase(r))
+ − 648
case ANOT(bs, r) => NOT(erase(r))
+ − 649
case AANYCHAR(bs) => ANYCHAR
+ − 650
}
+ − 651
+ − 652
+ − 653
def allCharSeq(r: Rexp) : Boolean = r match {
+ − 654
case CHAR(c) => true
+ − 655
case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
+ − 656
case _ => false
+ − 657
}
+ − 658
+ − 659
def flattenSeq(r: Rexp) : String = r match {
+ − 660
case CHAR(c) => c.toString
+ − 661
case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
+ − 662
case _ => throw new Error("flatten unflattenable rexp")
+ − 663
}
+ − 664
+ − 665
def shortRexpOutput(r: Rexp) : String = r match {
+ − 666
case CHAR(c) => c.toString
+ − 667
case ONE => "1"
+ − 668
case ZERO => "0"
+ − 669
case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 670
case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 671
case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
+ − 672
case STAR(STAR(r)) => "(..)*"// ++ shortRexpOutput(r) ++ "]*"
+ − 673
case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
+ − 674
//case RTOP => "RTOP"
+ − 675
}
+ − 676
+ − 677
+ − 678
def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 679
case Nil => {
+ − 680
if (bnullable(r)) {
+ − 681
//println(asize(r))
+ − 682
val bits = mkepsBC(r)
+ − 683
bits
+ − 684
}
+ − 685
else
+ − 686
throw new Exception("Not matched")
+ − 687
}
+ − 688
case c::cs => {
+ − 689
val der_res = bder(c,r)
+ − 690
val simp_res = bsimp(der_res)
+ − 691
blex_simp(simp_res, cs)
+ − 692
}
+ − 693
}
+ − 694
def size(r: Rexp) : Int = r match {
+ − 695
case ZERO => 1
+ − 696
case ONE => 1
+ − 697
case CHAR(_) => 1
+ − 698
case ANYCHAR => 1
+ − 699
case NOT(r0) => 1 + size(r0)
+ − 700
case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 701
case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
+ − 702
case STAR(r) => 1 + size(r)
+ − 703
}
+ − 704
+ − 705
def asize(a: ARexp) = size(erase(a))
+ − 706
+ − 707
//pder related
+ − 708
type Mon = (Char, Rexp)
+ − 709
type Lin = Set[Mon]
+ − 710
+ − 711
def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
+ − 712
case ZERO => Set()
+ − 713
case ONE => rs
+ − 714
case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) )
+ − 715
}
+ − 716
def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
+ − 717
case ZERO => Set()
+ − 718
case ONE => l
+ − 719
case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) } )
+ − 720
}
+ − 721
def lf(r: Rexp): Lin = r match {
+ − 722
case ZERO => Set()
+ − 723
case ONE => Set()
+ − 724
case CHAR(f) => {
+ − 725
//val Some(c) = alphabet.find(f)
+ − 726
Set((f, ONE))
+ − 727
}
+ − 728
case ALTS(r1, r2) => {
+ − 729
lf(r1 ) ++ lf(r2)
+ − 730
}
+ − 731
case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
+ − 732
case SEQ(r1, r2) =>{
+ − 733
if (nullable(r1))
+ − 734
cir_prod(lf(r1), r2) ++ lf(r2)
+ − 735
else
+ − 736
cir_prod(lf(r1), r2)
+ − 737
}
+ − 738
}
+ − 739
def lfs(r: Set[Rexp]): Lin = {
+ − 740
r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
+ − 741
}
+ − 742
+ − 743
def pder(x: Char, t: Rexp): Set[Rexp] = {
+ − 744
val lft = lf(t)
+ − 745
(lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
+ − 746
}
+ − 747
def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
+ − 748
case x::xs => pders(xs, pder(x, t))
+ − 749
case Nil => Set(t)
+ − 750
}
+ − 751
def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
+ − 752
case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 753
case Nil => ts
+ − 754
}
+ − 755
def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
+ − 756
def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
+ − 757
//all implementation of partial derivatives that involve set union are potentially buggy
+ − 758
//because they don't include the original regular term before they are pdered.
+ − 759
//now only pderas is fixed.
+ − 760
def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
+ − 761
def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
+ − 762
def awidth(r: Rexp) : Int = r match {
+ − 763
case CHAR(c) => 1
+ − 764
case SEQ(r1, r2) => awidth(r1) + awidth(r2)
+ − 765
case ALTS(r1, r2) => awidth(r1) + awidth(r2)
+ − 766
case ONE => 0
+ − 767
case ZERO => 0
+ − 768
case STAR(r) => awidth(r)
+ − 769
}
+ − 770
//def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
+ − 771
//def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
+ − 772
def o(r: Rexp) = if (nullable(r)) ONE else ZERO
+ − 773
//def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
+ − 774
def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
+ − 775
case ZERO => Set[Rexp]()
+ − 776
case ONE => Set[Rexp]()
+ − 777
case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
+ − 778
case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
+ − 779
case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
+ − 780
case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
+ − 781
}
+ − 782
def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
+ − 783
case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 784
case Nil => ts
+ − 785
}
+ − 786
def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
+ − 787
+ − 788
+ − 789
+ − 790
def starPrint(r: Rexp) : Unit = r match {
+ − 791
+ − 792
case SEQ(head, rstar) =>
+ − 793
println(shortRexpOutput(head) ++ "~STARREG")
+ − 794
case STAR(rstar) =>
+ − 795
println("STARREG")
+ − 796
case ALTS(r1, r2) =>
+ − 797
println("(")
+ − 798
starPrint(r1)
+ − 799
println("+")
+ − 800
starPrint(r2)
+ − 801
println(")")
+ − 802
case ZERO => println("0")
+ − 803
+ − 804
}
+ − 805
+ − 806
// @arg(doc = "small tests")
+ − 807
val STARREG = //((( (STAR("aa")) | STAR("aaa") ).%).%)
+ − 808
(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa"| ( STAR("aaaaaaa")) | STAR("aaaaaaaaaaa"))).%).%).%
+ − 809
+ − 810
// @main
+ − 811
def small() = {
+ − 812
+ − 813
+ − 814
// println(lexing_simp(NOTREG, prog0))
+ − 815
// val v = lex_simp(NOTREG, prog0.toList)
+ − 816
// println(v)
+ − 817
+ − 818
// val d = (lex_simp(NOTREG, prog0.toList))
+ − 819
// println(d)
+ − 820
val pderSTAR = pderUNIV(STARREG)
+ − 821
+ − 822
val refSize = pderSTAR.map(size(_)).sum
435
+ − 823
// println("different partial derivative terms:")
+ − 824
// pderSTAR.foreach(r => r match {
431
+ − 825
435
+ − 826
// case SEQ(head, rstar) =>
+ − 827
// println(shortRexpOutput(head) ++ "~STARREG")
+ − 828
// case STAR(rstar) =>
+ − 829
// println("STARREG")
431
+ − 830
435
+ − 831
// }
+ − 832
// )
+ − 833
// println("the total number of terms is")
+ − 834
// //println(refSize)
+ − 835
// println(pderSTAR.size)
431
+ − 836
432
+ − 837
val A : Rexp= ("c" | (ONE | "b") ~ "d") ~((ONE).%)
+ − 838
val B : Rexp = ((ONE).%)
+ − 839
val C : Rexp = ("d") ~ ((ONE).%)
+ − 840
val PRUNE_REG : Rexp = (C | B | A)
+ − 841
val APRUNE_REG = internalise(PRUNE_REG)
435
+ − 842
// // val program_solution = pruneRexp(APRUNE_REG, breakIntoTerms(PRUNE_REG))
+ − 843
// // println("program executes and gives: as disired!")
+ − 844
// // println(shortRexpOutput(erase(program_solution)))
+ − 845
// val simpedPruneReg = strongBsimp(APRUNE_REG)
+ − 846
+ − 847
// println(shortRexpOutput(erase(simpedPruneReg)))
+ − 848
// for(i <- List(1,2 ) ){// 100, 400, 800, 840, 841, 900
+ − 849
// val prog0 = "a" * i
+ − 850
// //println(s"test: $prog0")
+ − 851
// println(s"testing with $i a's" )
+ − 852
// //val bd = bdersSimp(prog0, STARREG)//DB
+ − 853
// val sbd = bdersSimpS(prog0, STARREG)//strongDB
+ − 854
// starPrint(erase(sbd))
+ − 855
// val subTerms = breakIntoTerms(erase(sbd))
+ − 856
// //val subTermsLarge = breakIntoTerms(erase(bd))
431
+ − 857
435
+ − 858
// println(s"subterms of regex with strongDB: ${subTerms.length}")//, standard DB: ${subTermsLarge.length}")
431
+ − 859
+ − 860
+ − 861
435
+ − 862
// println("the number of distinct subterms for bsimp with strongDB")
+ − 863
// println(subTerms.distinct.size)
+ − 864
// //println(subTermsLarge.distinct.size)
+ − 865
// println("which coincides with the number of PDER terms")
431
+ − 866
+ − 867
435
+ − 868
// // println(shortRexpOutput(erase(sbd)))
+ − 869
// // println(shortRexpOutput(erase(bd)))
431
+ − 870
435
+ − 871
// println("pdersize, original, strongSimp")
+ − 872
// println(refSize, size(STARREG), asize(sbd))
431
+ − 873
435
+ − 874
// val vres = strong_blexing_simp( STARREG, prog0)
+ − 875
// println(vres)
+ − 876
// }
431
+ − 877
// println(vs.length)
+ − 878
// println(vs)
+ − 879
+ − 880
+ − 881
// val prog1 = """read n; write n"""
+ − 882
// println(s"test: $prog1")
+ − 883
// println(lexing_simp(WHILE_REGS, prog1))
435
+ − 884
val display = ("a"| "ab").%
+ − 885
val adisplay = internalise(display)
+ − 886
bders_simp( "aaaaa".toList, adisplay)
431
+ − 887
}
+ − 888
+ − 889
small()