431
+ − 1
// A simple lexer inspired by work of Sulzmann & Lu
+ − 2
//==================================================
+ − 3
//
+ − 4
// Call the test cases with
+ − 5
//
+ − 6
// amm lexer.sc small
+ − 7
// amm lexer.sc fib
+ − 8
// amm lexer.sc loops
+ − 9
// amm lexer.sc email
+ − 10
//
+ − 11
// amm lexer.sc all
+ − 12
492
+ − 13
import scala.util.Try
431
+ − 14
+ − 15
// regular expressions including records
+ − 16
abstract class Rexp
+ − 17
case object ZERO extends Rexp
+ − 18
case object ONE extends Rexp
+ − 19
case object ANYCHAR extends Rexp
+ − 20
case class CHAR(c: Char) extends Rexp
+ − 21
case class ALTS(r1: Rexp, r2: Rexp) extends Rexp
+ − 22
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+ − 23
case class STAR(r: Rexp) extends Rexp
+ − 24
case class RECD(x: String, r: Rexp) extends Rexp
+ − 25
case class NTIMES(n: Int, r: Rexp) extends Rexp
+ − 26
case class OPTIONAL(r: Rexp) extends Rexp
+ − 27
case class NOT(r: Rexp) extends Rexp
+ − 28
// records for extracting strings or tokens
+ − 29
+ − 30
// values
+ − 31
abstract class Val
492
+ − 32
case object Failure extends Val
431
+ − 33
case object Empty extends Val
+ − 34
case class Chr(c: Char) extends Val
+ − 35
case class Sequ(v1: Val, v2: Val) extends Val
+ − 36
case class Left(v: Val) extends Val
+ − 37
case class Right(v: Val) extends Val
+ − 38
case class Stars(vs: List[Val]) extends Val
+ − 39
case class Rec(x: String, v: Val) extends Val
+ − 40
case class Ntime(vs: List[Val]) extends Val
+ − 41
case class Optionall(v: Val) extends Val
+ − 42
case class Nots(s: String) extends Val
+ − 43
+ − 44
+ − 45
+ − 46
abstract class Bit
+ − 47
case object Z extends Bit
+ − 48
case object S extends Bit
+ − 49
+ − 50
+ − 51
type Bits = List[Bit]
+ − 52
+ − 53
abstract class ARexp
+ − 54
case object AZERO extends ARexp
+ − 55
case class AONE(bs: Bits) extends ARexp
+ − 56
case class ACHAR(bs: Bits, c: Char) extends ARexp
+ − 57
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp
+ − 58
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp
+ − 59
case class ASTAR(bs: Bits, r: ARexp) extends ARexp
+ − 60
case class ANOT(bs: Bits, r: ARexp) extends ARexp
+ − 61
case class AANYCHAR(bs: Bits) extends ARexp
+ − 62
492
+ − 63
trait Generator[+T] {
+ − 64
self => // an alias for "this"
+ − 65
def generate(): T
+ − 66
+ − 67
def gen(n: Int) : List[T] =
+ − 68
if (n == 0) Nil else self.generate() :: gen(n - 1)
+ − 69
+ − 70
def map[S](f: T => S): Generator[S] = new Generator[S] {
+ − 71
def generate = f(self.generate())
+ − 72
}
+ − 73
def flatMap[S](f: T => Generator[S]): Generator[S] = new Generator[S] {
+ − 74
def generate = f(self.generate()).generate()
+ − 75
}
+ − 76
+ − 77
+ − 78
}
+ − 79
+ − 80
// tests a property according to a given random generator
+ − 81
def test[T](r: Generator[T], amount: Int = 100)(pred: T => Boolean) {
+ − 82
for (_ <- 0 until amount) {
+ − 83
val value = r.generate()
+ − 84
assert(pred(value), s"Test failed for: $value")
+ − 85
}
+ − 86
println(s"Test passed $amount times")
+ − 87
}
+ − 88
def test2[T, S](r: Generator[T], s: Generator[S], amount: Int = 100)(pred: (T, S) => Boolean) {
+ − 89
for (_ <- 0 until amount) {
+ − 90
val valueR = r.generate()
+ − 91
val valueS = s.generate()
+ − 92
assert(pred(valueR, valueS), s"Test failed for: $valueR, $valueS")
+ − 93
}
+ − 94
println(s"Test passed $amount times")
+ − 95
}
+ − 96
+ − 97
// random integers
+ − 98
val integers = new Generator[Int] {
+ − 99
val rand = new java.util.Random
+ − 100
def generate() = rand.nextInt()
+ − 101
}
+ − 102
+ − 103
// random booleans
+ − 104
val booleans = integers.map(_ > 0)
+ − 105
+ − 106
// random integers in the range lo and high
+ − 107
def range(lo: Int, hi: Int): Generator[Int] =
+ − 108
for (x <- integers) yield (lo + x.abs % (hi - lo)).abs
+ − 109
+ − 110
// random characters
+ − 111
def chars_range(lo: Char, hi: Char) = range(lo, hi).map(_.toChar)
+ − 112
val chars = chars_range('a', 'z')
+ − 113
+ − 114
+ − 115
def oneOf[T](xs: T*): Generator[T] =
+ − 116
for (idx <- range(0, xs.length)) yield xs(idx)
+ − 117
+ − 118
def single[T](x: T) = new Generator[T] {
+ − 119
def generate() = x
+ − 120
}
+ − 121
+ − 122
def pairs[T, U](t: Generator[T], u: Generator[U]): Generator[(T, U)] =
+ − 123
for (x <- t; y <- u) yield (x, y)
+ − 124
+ − 125
// lists
+ − 126
def emptyLists = single(Nil)
+ − 127
+ − 128
def nonEmptyLists : Generator[List[Int]] =
+ − 129
for (head <- integers; tail <- lists) yield head :: tail
+ − 130
+ − 131
def lists: Generator[List[Int]] = for {
+ − 132
kind <- booleans
+ − 133
list <- if (kind) emptyLists else nonEmptyLists
+ − 134
} yield list
+ − 135
+ − 136
def char_list(len: Int): Generator[List[Char]] = {
+ − 137
if(len <= 0) single(Nil)
+ − 138
else{
+ − 139
for {
500
+ − 140
c <- chars_range('a', 'c')
492
+ − 141
tail <- char_list(len - 1)
+ − 142
} yield c :: tail
+ − 143
}
+ − 144
}
+ − 145
+ − 146
def strings(len: Int): Generator[String] = for(cs <- char_list(len)) yield cs.toString
+ − 147
493
+ − 148
def sampleString(r: Rexp) : List[String] = r match {
+ − 149
case STAR(r) => stringsFromRexp(r).flatMap(s => List("", s, s ++ s))//only generate 0, 1, 2 reptitions
+ − 150
case SEQ(r1, r2) => stringsFromRexp(r1).flatMap(s1 => stringsFromRexp(r2).map(s2 => s1 ++ s2) )
+ − 151
case ALTS(r1, r2) => throw new Error(s" Rexp ${r} not expected: all alternatives are supposed to have been opened up")
+ − 152
case ONE => "" :: Nil
+ − 153
case ZERO => Nil
+ − 154
case CHAR(c) => c.toString :: Nil
+ − 155
+ − 156
}
+ − 157
+ − 158
def stringsFromRexp(r: Rexp) : List[String] =
+ − 159
breakIntoTerms(r).flatMap(r => sampleString(r))
+ − 160
492
+ − 161
+ − 162
// (simple) binary trees
+ − 163
trait Tree[T]
+ − 164
case class Inner[T](left: Tree[T], right: Tree[T]) extends Tree[T]
+ − 165
case class Leaf[T](x: T) extends Tree[T]
+ − 166
+ − 167
def leafs[T](t: Generator[T]): Generator[Leaf[T]] =
+ − 168
for (x <- t) yield Leaf(x)
+ − 169
+ − 170
def inners[T](t: Generator[T]): Generator[Inner[T]] =
+ − 171
for (l <- trees(t); r <- trees(t)) yield Inner(l, r)
+ − 172
+ − 173
def trees[T](t: Generator[T]): Generator[Tree[T]] =
+ − 174
for (kind <- range(0, 2);
+ − 175
tree <- if (kind == 0) leafs(t) else inners(t)) yield tree
+ − 176
+ − 177
// regular expressions
+ − 178
+ − 179
// generates random leaf-regexes; prefers CHAR-regexes
+ − 180
def leaf_rexp() : Generator[Rexp] =
+ − 181
for (kind <- range(0, 5);
+ − 182
c <- chars_range('a', 'd')) yield
+ − 183
kind match {
+ − 184
case 0 => ZERO
+ − 185
case 1 => ONE
+ − 186
case _ => CHAR(c)
+ − 187
}
+ − 188
+ − 189
// generates random inner regexes with maximum depth d
+ − 190
def inner_rexp(d: Int) : Generator[Rexp] =
+ − 191
for (kind <- range(0, 3);
+ − 192
l <- rexp(d);
+ − 193
r <- rexp(d))
+ − 194
yield kind match {
+ − 195
case 0 => ALTS(l, r)
+ − 196
case 1 => SEQ(l, r)
+ − 197
case 2 => STAR(r)
+ − 198
}
+ − 199
+ − 200
// generates random regexes with maximum depth d;
+ − 201
// prefers inner regexes in 2/3 of the cases
+ − 202
def rexp(d: Int = 100): Generator[Rexp] =
+ − 203
for (kind <- range(0, 3);
+ − 204
r <- if (d <= 0 || kind == 0) leaf_rexp() else inner_rexp(d - 1)) yield r
+ − 205
+ − 206
+ − 207
// some test functions for rexps
+ − 208
def height(r: Rexp) : Int = r match {
+ − 209
case ZERO => 1
+ − 210
case ONE => 1
+ − 211
case CHAR(_) => 1
+ − 212
case ALTS(r1, r2) => 1 + List(height(r1), height(r2)).max
+ − 213
case SEQ(r1, r2) => 1 + List(height(r1), height(r2)).max
+ − 214
case STAR(r) => 1 + height(r)
+ − 215
}
+ − 216
+ − 217
// def size(r: Rexp) : Int = r match {
+ − 218
// case ZERO => 1
+ − 219
// case ONE => 1
+ − 220
// case CHAR(_) => 1
+ − 221
// case ALTS(r1, r2) => 1 + size(r1) + size(r2)
+ − 222
// case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 223
// case STAR(r) => 1 + size(r)
+ − 224
// }
+ − 225
+ − 226
// randomly subtracts 1 or 2 from the STAR case
+ − 227
def size_faulty(r: Rexp) : Int = r match {
+ − 228
case ZERO => 1
+ − 229
case ONE => 1
+ − 230
case CHAR(_) => 1
+ − 231
case ALTS(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
+ − 232
case SEQ(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
+ − 233
case STAR(r) => 1 + size_faulty(r) - range(0, 2).generate
+ − 234
}
+ − 235
431
+ − 236
+ − 237
+ − 238
// some convenience for typing in regular expressions
+ − 239
+ − 240
def charlist2rexp(s : List[Char]): Rexp = s match {
+ − 241
case Nil => ONE
+ − 242
case c::Nil => CHAR(c)
+ − 243
case c::s => SEQ(CHAR(c), charlist2rexp(s))
+ − 244
}
+ − 245
implicit def string2rexp(s : String) : Rexp =
+ − 246
charlist2rexp(s.toList)
+ − 247
+ − 248
implicit def RexpOps(r: Rexp) = new {
+ − 249
def | (s: Rexp) = ALTS(r, s)
+ − 250
def % = STAR(r)
+ − 251
def ~ (s: Rexp) = SEQ(r, s)
+ − 252
}
+ − 253
+ − 254
implicit def stringOps(s: String) = new {
+ − 255
def | (r: Rexp) = ALTS(s, r)
+ − 256
def | (r: String) = ALTS(s, r)
+ − 257
def % = STAR(s)
+ − 258
def ~ (r: Rexp) = SEQ(s, r)
+ − 259
def ~ (r: String) = SEQ(s, r)
+ − 260
def $ (r: Rexp) = RECD(s, r)
+ − 261
}
+ − 262
+ − 263
def nullable(r: Rexp) : Boolean = r match {
+ − 264
case ZERO => false
+ − 265
case ONE => true
+ − 266
case CHAR(_) => false
+ − 267
case ANYCHAR => false
+ − 268
case ALTS(r1, r2) => nullable(r1) || nullable(r2)
+ − 269
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ − 270
case STAR(_) => true
+ − 271
case RECD(_, r1) => nullable(r1)
+ − 272
case NTIMES(n, r) => if (n == 0) true else nullable(r)
+ − 273
case OPTIONAL(r) => true
+ − 274
case NOT(r) => !nullable(r)
+ − 275
}
+ − 276
+ − 277
def der(c: Char, r: Rexp) : Rexp = r match {
+ − 278
case ZERO => ZERO
+ − 279
case ONE => ZERO
+ − 280
case CHAR(d) => if (c == d) ONE else ZERO
+ − 281
case ANYCHAR => ONE
+ − 282
case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
+ − 283
case SEQ(r1, r2) =>
+ − 284
if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
+ − 285
else SEQ(der(c, r1), r2)
+ − 286
case STAR(r) => SEQ(der(c, r), STAR(r))
+ − 287
case RECD(_, r1) => der(c, r1)
+ − 288
case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
+ − 289
case OPTIONAL(r) => der(c, r)
+ − 290
case NOT(r) => NOT(der(c, r))
+ − 291
}
+ − 292
+ − 293
+ − 294
// extracts a string from a value
+ − 295
def flatten(v: Val) : String = v match {
+ − 296
case Empty => ""
+ − 297
case Chr(c) => c.toString
+ − 298
case Left(v) => flatten(v)
+ − 299
case Right(v) => flatten(v)
+ − 300
case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
+ − 301
case Stars(vs) => vs.map(flatten).mkString
+ − 302
case Ntime(vs) => vs.map(flatten).mkString
+ − 303
case Optionall(v) => flatten(v)
+ − 304
case Rec(_, v) => flatten(v)
+ − 305
}
+ − 306
+ − 307
+ − 308
// extracts an environment from a value;
+ − 309
// used for tokenising a string
+ − 310
def env(v: Val) : List[(String, String)] = v match {
+ − 311
case Empty => Nil
+ − 312
case Chr(c) => Nil
+ − 313
case Left(v) => env(v)
+ − 314
case Right(v) => env(v)
+ − 315
case Sequ(v1, v2) => env(v1) ::: env(v2)
+ − 316
case Stars(vs) => vs.flatMap(env)
+ − 317
case Ntime(vs) => vs.flatMap(env)
+ − 318
case Rec(x, v) => (x, flatten(v))::env(v)
+ − 319
case Optionall(v) => env(v)
+ − 320
case Nots(s) => ("Negative", s) :: Nil
+ − 321
}
+ − 322
+ − 323
+ − 324
// The injection and mkeps part of the lexer
+ − 325
//===========================================
+ − 326
+ − 327
def mkeps(r: Rexp) : Val = r match {
+ − 328
case ONE => Empty
+ − 329
case ALTS(r1, r2) =>
+ − 330
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+ − 331
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+ − 332
case STAR(r) => Stars(Nil)
+ − 333
case RECD(x, r) => Rec(x, mkeps(r))
+ − 334
case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
+ − 335
case OPTIONAL(r) => Optionall(Empty)
+ − 336
case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")
+ − 337
else Nots("")//Nots(s.reverse.toString)
+ − 338
// case NOT(ZERO) => Empty
+ − 339
// case NOT(CHAR(c)) => Empty
+ − 340
// case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
+ − 341
// case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
+ − 342
// case NOT(STAR(r)) => Stars(Nil)
+ − 343
+ − 344
}
+ − 345
+ − 346
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+ − 347
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+ − 348
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+ − 349
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+ − 350
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+ − 351
case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
+ − 352
case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
+ − 353
case (CHAR(d), Empty) => Chr(c)
+ − 354
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+ − 355
case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
+ − 356
case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
+ − 357
case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
+ − 358
case (ANYCHAR, Empty) => Chr(c)
+ − 359
}
+ − 360
+ − 361
// some "rectification" functions for simplification
+ − 362
+ − 363
+ − 364
+ − 365
+ − 366
// The Lexing Rules for the WHILE Language
+ − 367
+ − 368
// bnullable function: tests whether the aregular
+ − 369
// expression can recognise the empty string
+ − 370
def bnullable (r: ARexp) : Boolean = r match {
+ − 371
case AZERO => false
+ − 372
case AONE(_) => true
+ − 373
case ACHAR(_,_) => false
+ − 374
case AALTS(_, rs) => rs.exists(bnullable)
+ − 375
case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
+ − 376
case ASTAR(_, _) => true
+ − 377
case ANOT(_, rn) => !bnullable(rn)
+ − 378
}
+ − 379
+ − 380
def mkepsBC(r: ARexp) : Bits = r match {
+ − 381
case AONE(bs) => bs
+ − 382
case AALTS(bs, rs) => {
+ − 383
val n = rs.indexWhere(bnullable)
+ − 384
bs ++ mkepsBC(rs(n))
+ − 385
}
+ − 386
case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
+ − 387
case ASTAR(bs, r) => bs ++ List(Z)
+ − 388
case ANOT(bs, rn) => bs
+ − 389
}
+ − 390
+ − 391
+ − 392
def bder(c: Char, r: ARexp) : ARexp = r match {
+ − 393
case AZERO => AZERO
+ − 394
case AONE(_) => AZERO
+ − 395
case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
+ − 396
case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
+ − 397
case ASEQ(bs, r1, r2) =>
+ − 398
if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
+ − 399
else ASEQ(bs, bder(c, r1), r2)
+ − 400
case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
+ − 401
case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
+ − 402
case AANYCHAR(bs) => AONE(bs)
+ − 403
}
+ − 404
+ − 405
def fuse(bs: Bits, r: ARexp) : ARexp = r match {
+ − 406
case AZERO => AZERO
+ − 407
case AONE(cs) => AONE(bs ++ cs)
+ − 408
case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
+ − 409
case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
+ − 410
case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
+ − 411
case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
+ − 412
case ANOT(cs, r) => ANOT(bs ++ cs, r)
+ − 413
}
+ − 414
+ − 415
+ − 416
def internalise(r: Rexp) : ARexp = r match {
+ − 417
case ZERO => AZERO
+ − 418
case ONE => AONE(Nil)
+ − 419
case CHAR(c) => ACHAR(Nil, c)
+ − 420
//case PRED(f) => APRED(Nil, f)
+ − 421
case ALTS(r1, r2) =>
+ − 422
AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
+ − 423
// case ALTS(r1::rs) => {
+ − 424
// val AALTS(Nil, rs2) = internalise(ALTS(rs))
+ − 425
// AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
+ − 426
// }
+ − 427
case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
+ − 428
case STAR(r) => ASTAR(Nil, internalise(r))
+ − 429
case RECD(x, r) => internalise(r)
+ − 430
case NOT(r) => ANOT(Nil, internalise(r))
+ − 431
case ANYCHAR => AANYCHAR(Nil)
+ − 432
}
+ − 433
+ − 434
+ − 435
def bsimp(r: ARexp): ARexp =
+ − 436
{
+ − 437
r match {
+ − 438
case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
+ − 439
case (AZERO, _) => AZERO
+ − 440
case (_, AZERO) => AZERO
+ − 441
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 442
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 443
}
+ − 444
case AALTS(bs1, rs) => {
+ − 445
val rs_simp = rs.map(bsimp(_))
+ − 446
val flat_res = flats(rs_simp)
+ − 447
val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
+ − 448
dist_res match {
+ − 449
case Nil => AZERO
+ − 450
case s :: Nil => fuse(bs1, s)
+ − 451
case rs => AALTS(bs1, rs)
+ − 452
}
+ − 453
+ − 454
}
+ − 455
case r => r
+ − 456
}
492
+ − 457
}
+ − 458
def strongBsimp(r: ARexp): ARexp =
+ − 459
{
+ − 460
r match {
+ − 461
case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
+ − 462
case (AZERO, _) => AZERO
+ − 463
case (_, AZERO) => AZERO
+ − 464
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 465
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
431
+ − 466
}
492
+ − 467
case AALTS(bs1, rs) => {
+ − 468
val rs_simp = rs.map(strongBsimp(_))
+ − 469
val flat_res = flats(rs_simp)
+ − 470
val dist_res = distinctBy4(flat_res)//distinctBy(flat_res, erase)
+ − 471
dist_res match {
+ − 472
case Nil => AZERO
+ − 473
case s :: Nil => fuse(bs1, s)
+ − 474
case rs => AALTS(bs1, rs)
+ − 475
}
+ − 476
+ − 477
}
+ − 478
case r => r
431
+ − 479
}
492
+ − 480
}
431
+ − 481
494
+ − 482
def strongBsimp5(r: ARexp): ARexp =
+ − 483
{
+ − 484
// println("was this called?")
+ − 485
r match {
+ − 486
case ASEQ(bs1, r1, r2) => (strongBsimp5(r1), strongBsimp5(r2)) match {
+ − 487
case (AZERO, _) => AZERO
+ − 488
case (_, AZERO) => AZERO
+ − 489
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 490
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 491
}
+ − 492
case AALTS(bs1, rs) => {
+ − 493
// println("alts case")
+ − 494
val rs_simp = rs.map(strongBsimp5(_))
+ − 495
val flat_res = flats(rs_simp)
500
+ − 496
var dist_res = distinctBy5(flat_res)//distinctBy(flat_res, erase)
+ − 497
var dist2_res = distinctBy5(dist_res)
+ − 498
while(dist_res != dist2_res){
+ − 499
dist_res = dist2_res
+ − 500
dist2_res = distinctBy5(dist_res)
+ − 501
}
+ − 502
(dist2_res) match {
494
+ − 503
case Nil => AZERO
+ − 504
case s :: Nil => fuse(bs1, s)
+ − 505
case rs => AALTS(bs1, rs)
+ − 506
}
+ − 507
}
+ − 508
case r => r
+ − 509
}
+ − 510
}
+ − 511
492
+ − 512
def bders (s: List[Char], r: ARexp) : ARexp = s match {
+ − 513
case Nil => r
+ − 514
case c::s => bders(s, bder(c, r))
+ − 515
}
431
+ − 516
492
+ − 517
def flats(rs: List[ARexp]): List[ARexp] = rs match {
431
+ − 518
case Nil => Nil
492
+ − 519
case AZERO :: rs1 => flats(rs1)
+ − 520
case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
+ − 521
case r1 :: rs2 => r1 :: flats(rs2)
431
+ − 522
}
+ − 523
492
+ − 524
def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
+ − 525
case Nil => Nil
+ − 526
case (x::xs) => {
+ − 527
val res = f(x)
+ − 528
if (acc.contains(res)) distinctBy(xs, f, acc)
+ − 529
else x::distinctBy(xs, f, res::acc)
431
+ − 530
}
492
+ − 531
}
431
+ − 532
+ − 533
492
+ − 534
def pruneRexp(r: ARexp, allowableTerms: List[Rexp]) : ARexp = {
+ − 535
r match {
+ − 536
case ASEQ(bs, r1, r2) =>
+ − 537
val termsTruncated = allowableTerms.collect(rt => rt match {
+ − 538
case SEQ(r1p, r2p) if(r2p == erase(r2)) => r1p//if(r2p == erase(r2))
+ − 539
})
+ − 540
val pruned : ARexp = pruneRexp(r1, termsTruncated)
+ − 541
pruned match {
+ − 542
case AZERO => AZERO
+ − 543
case AONE(bs1) => fuse(bs ++ bs1, r2)
+ − 544
case pruned1 => ASEQ(bs, pruned1, r2)
+ − 545
}
+ − 546
+ − 547
case AALTS(bs, rs) =>
+ − 548
//allowableTerms.foreach(a => println(shortRexpOutput(a)))
+ − 549
val rsp = rs.map(r =>
+ − 550
pruneRexp(r, allowableTerms)
+ − 551
)
+ − 552
.filter(r => r != AZERO)
+ − 553
rsp match {
+ − 554
case Nil => AZERO
+ − 555
case r1::Nil => fuse(bs, r1)
+ − 556
case rs1 => AALTS(bs, rs1)
+ − 557
}
+ − 558
case r =>
+ − 559
if(allowableTerms.contains(erase(r))) r else AZERO //assert(r != AZERO)
+ − 560
}
+ − 561
}
+ − 562
+ − 563
def oneSimp(r: Rexp) : Rexp = r match {
+ − 564
case SEQ(ONE, r) => r
+ − 565
case SEQ(r1, r2) => SEQ(oneSimp(r1), r2)
+ − 566
case r => r//assert r != 0
432
+ − 567
492
+ − 568
}
431
+ − 569
+ − 570
492
+ − 571
def distinctBy4(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 572
case Nil => Nil
+ − 573
case x :: xs =>
+ − 574
//assert(acc.distinct == acc)
+ − 575
val erased = erase(x)
+ − 576
if(acc.contains(erased))
+ − 577
distinctBy4(xs, acc)
+ − 578
else{
+ − 579
val addToAcc = breakIntoTerms(erased).filter(r => !acc.contains(oneSimp(r)))
+ − 580
//val xp = pruneRexp(x, addToAcc)
+ − 581
pruneRexp(x, addToAcc) match {
+ − 582
case AZERO => distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 583
case xPrime => xPrime :: distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 584
}
431
+ − 585
}
492
+ − 586
}
+ − 587
494
+ − 588
// fun pAKC_aux :: "arexp list ⇒ arexp ⇒ rexp ⇒ arexp"
+ − 589
// where
+ − 590
// "pAKC_aux rsa r ctx = (if (L (SEQ (erase r) ( ctx) )) ⊆ (L (erase (AALTs [] rsa))) then AZERO else
+ − 591
// case r of (ASEQ bs r1 r2) ⇒
+ − 592
// bsimp_ASEQ bs (pAKC_aux rsa r1 (SEQ (erase r2) ( ctx) )) r2 |
+ − 593
// (AALTs bs rs) ⇒
+ − 594
// bsimp_AALTs bs (flts (map (λr. pAKC_aux rsa r ( ctx) ) rs) ) |
+ − 595
// r ⇒ r
+ − 596
+ − 597
// def canonicalSeq(rs: List[Rexp], acc: Rexp) = rs match {
+ − 598
// case r::Nil => SEQ(r, acc)
+ − 599
// case Nil => acc
+ − 600
// case r1::r2::Nil => SEQ(SEQ(r1, r2), acc)
+ − 601
// }
+ − 602
+ − 603
+ − 604
//the "fake" Language interpretation: just concatenates!
+ − 605
def L(acc: Rexp, rs: List[Rexp]) : Rexp = rs match {
+ − 606
case Nil => acc
+ − 607
case r :: rs1 =>
500
+ − 608
// if(acc == ONE)
+ − 609
// L(r, rs1)
+ − 610
// else
494
+ − 611
L(SEQ(acc, r), rs1)
+ − 612
}
+ − 613
500
+ − 614
def rprint(r: Rexp) : Unit = println(shortRexpOutput(r))
+ − 615
def rsprint(rs: List[Rexp]) = rs.foreach(r => println(shortRexpOutput(r)))
+ − 616
def aprint(a: ARexp) = println(shortRexpOutput(erase(a)))
+ − 617
def asprint(as: List[ARexp]) = as.foreach(a => println(shortRexpOutput(erase(a))))
+ − 618
+ − 619
def pAKC(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
+ − 620
// println("pakc")
+ − 621
// println(shortRexpOutput(erase(r)))
+ − 622
// println("acc")
+ − 623
// rsprint(acc)
+ − 624
// println("ctx---------")
+ − 625
// rsprint(ctx)
+ − 626
// println("ctx---------end")
+ − 627
// rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp))
+ − 628
+ − 629
if (breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)) {//acc.flatMap(breakIntoTerms
494
+ − 630
AZERO
+ − 631
}
+ − 632
else{
+ − 633
r match {
+ − 634
case ASEQ(bs, r1, r2) =>
500
+ − 635
(pAKC(acc, r1, erase(r2) :: ctx)) match{
494
+ − 636
case AZERO =>
+ − 637
AZERO
+ − 638
case AONE(bs1) =>
+ − 639
fuse(bs1, r2)
+ − 640
case r1p =>
+ − 641
ASEQ(bs, r1p, r2)
+ − 642
}
+ − 643
case AALTS(bs, rs0) =>
+ − 644
// println("before pruning")
+ − 645
// println(s"ctx is ")
+ − 646
// ctx.foreach(r => println(shortRexpOutput(r)))
+ − 647
// println(s"rs0 is ")
+ − 648
// rs0.foreach(r => println(shortRexpOutput(erase(r))))
500
+ − 649
// println(s"acc is ")
+ − 650
// acc.foreach(r => println(shortRexpOutput(r)))
+ − 651
rs0.map(r => pAKC(acc, r, ctx)).filter(_ != AZERO) match {
494
+ − 652
case Nil =>
+ − 653
// println("after pruning Nil")
+ − 654
AZERO
+ − 655
case r :: Nil =>
+ − 656
// println("after pruning singleton")
+ − 657
// println(shortRexpOutput(erase(r)))
+ − 658
r
+ − 659
case rs0p =>
+ − 660
// println("after pruning non-singleton")
+ − 661
AALTS(bs, rs0p)
+ − 662
}
+ − 663
case r => r
+ − 664
}
+ − 665
}
+ − 666
}
+ − 667
+ − 668
def distinctBy5(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 669
case Nil =>
+ − 670
Nil
+ − 671
case x :: xs => {
+ − 672
val erased = erase(x)
+ − 673
if(acc.contains(erased)){
+ − 674
distinctBy5(xs, acc)
+ − 675
}
+ − 676
else{
+ − 677
val pruned = pAKC(acc, x, Nil)
+ − 678
val newTerms = breakIntoTerms(erase(pruned))
+ − 679
pruned match {
+ − 680
case AZERO =>
+ − 681
distinctBy5(xs, acc)
+ − 682
case xPrime =>
+ − 683
xPrime :: distinctBy5(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 684
}
+ − 685
}
+ − 686
}
+ − 687
}
+ − 688
431
+ − 689
492
+ − 690
def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
+ − 691
case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
+ − 692
case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
494
+ − 693
case ZERO => Nil
492
+ − 694
case _ => r::Nil
+ − 695
}
431
+ − 696
+ − 697
+ − 698
492
+ − 699
def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
+ − 700
case (ONE, bs) => (Empty, bs)
+ − 701
case (CHAR(f), bs) => (Chr(f), bs)
+ − 702
case (ALTS(r1, r2), Z::bs1) => {
+ − 703
val (v, bs2) = decode_aux(r1, bs1)
+ − 704
(Left(v), bs2)
+ − 705
}
+ − 706
case (ALTS(r1, r2), S::bs1) => {
+ − 707
val (v, bs2) = decode_aux(r2, bs1)
+ − 708
(Right(v), bs2)
+ − 709
}
+ − 710
case (SEQ(r1, r2), bs) => {
+ − 711
val (v1, bs1) = decode_aux(r1, bs)
+ − 712
val (v2, bs2) = decode_aux(r2, bs1)
+ − 713
(Sequ(v1, v2), bs2)
+ − 714
}
+ − 715
case (STAR(r1), S::bs) => {
+ − 716
val (v, bs1) = decode_aux(r1, bs)
494
+ − 717
//(v)
492
+ − 718
val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
+ − 719
(Stars(v::vs), bs2)
+ − 720
}
+ − 721
case (STAR(_), Z::bs) => (Stars(Nil), bs)
+ − 722
case (RECD(x, r1), bs) => {
+ − 723
val (v, bs1) = decode_aux(r1, bs)
+ − 724
(Rec(x, v), bs1)
+ − 725
}
+ − 726
case (NOT(r), bs) => (Nots(r.toString), bs)
431
+ − 727
}
+ − 728
492
+ − 729
def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
+ − 730
case (v, Nil) => v
+ − 731
case _ => throw new Exception("Not decodable")
+ − 732
}
+ − 733
+ − 734
+ − 735
431
+ − 736
def blexing_simp(r: Rexp, s: String) : Val = {
+ − 737
val bit_code = blex_simp(internalise(r), s.toList)
+ − 738
decode(r, bit_code)
492
+ − 739
}
+ − 740
def simpBlexer(r: Rexp, s: String) : Val = {
+ − 741
Try(blexing_simp(r, s)).getOrElse(Failure)
+ − 742
}
431
+ − 743
492
+ − 744
def strong_blexing_simp(r: Rexp, s: String) : Val = {
+ − 745
decode(r, strong_blex_simp(internalise(r), s.toList))
+ − 746
}
+ − 747
494
+ − 748
def strong_blexing_simp5(r: Rexp, s: String) : Val = {
+ − 749
decode(r, strong_blex_simp5(internalise(r), s.toList))
+ − 750
}
+ − 751
+ − 752
492
+ − 753
def strongBlexer(r: Rexp, s: String) : Val = {
+ − 754
Try(strong_blexing_simp(r, s)).getOrElse(Failure)
+ − 755
}
431
+ − 756
494
+ − 757
def strongBlexer5(r: Rexp, s: String): Val = {
+ − 758
Try(strong_blexing_simp5(r, s)).getOrElse(Failure)
+ − 759
}
+ − 760
492
+ − 761
def strong_blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 762
case Nil => {
+ − 763
if (bnullable(r)) {
+ − 764
//println(asize(r))
+ − 765
val bits = mkepsBC(r)
+ − 766
bits
431
+ − 767
}
492
+ − 768
else
+ − 769
throw new Exception("Not matched")
431
+ − 770
}
492
+ − 771
case c::cs => {
+ − 772
val der_res = bder(c,r)
+ − 773
val simp_res = strongBsimp(der_res)
+ − 774
strong_blex_simp(simp_res, cs)
+ − 775
}
+ − 776
}
431
+ − 777
494
+ − 778
def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match {
+ − 779
case Nil => {
+ − 780
if (bnullable(r)) {
+ − 781
//println(asize(r))
+ − 782
val bits = mkepsBC(r)
+ − 783
bits
+ − 784
}
+ − 785
else
+ − 786
throw new Exception("Not matched")
+ − 787
}
+ − 788
case c::cs => {
+ − 789
val der_res = bder(c,r)
+ − 790
val simp_res = strongBsimp5(der_res)
+ − 791
strong_blex_simp5(simp_res, cs)
+ − 792
}
+ − 793
}
431
+ − 794
+ − 795
+ − 796
def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
+ − 797
case Nil => r
435
+ − 798
case c::s =>
494
+ − 799
//println(erase(r))
435
+ − 800
bders_simp(s, bsimp(bder(c, r)))
431
+ − 801
}
+ − 802
494
+ − 803
def bdersStrong5(s: List[Char], r: ARexp) : ARexp = s match {
+ − 804
case Nil => r
+ − 805
case c::s => bdersStrong5(s, strongBsimp5(bder(c, r)))
+ − 806
}
+ − 807
431
+ − 808
def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
+ − 809
492
+ − 810
def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
431
+ − 811
case Nil => r
492
+ − 812
case c::s => bdersStrong(s, strongBsimp(bder(c, r)))
431
+ − 813
}
+ − 814
492
+ − 815
def bdersStrongRexp(s: String, r: Rexp) : ARexp = bdersStrong(s.toList, internalise(r))
431
+ − 816
+ − 817
def erase(r:ARexp): Rexp = r match{
+ − 818
case AZERO => ZERO
+ − 819
case AONE(_) => ONE
+ − 820
case ACHAR(bs, c) => CHAR(c)
+ − 821
case AALTS(bs, Nil) => ZERO
+ − 822
case AALTS(bs, a::Nil) => erase(a)
+ − 823
case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
+ − 824
case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
+ − 825
case ASTAR(cs, r)=> STAR(erase(r))
+ − 826
case ANOT(bs, r) => NOT(erase(r))
+ − 827
case AANYCHAR(bs) => ANYCHAR
+ − 828
}
+ − 829
+ − 830
492
+ − 831
def allCharSeq(r: Rexp) : Boolean = r match {
+ − 832
case CHAR(c) => true
+ − 833
case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
+ − 834
case _ => false
+ − 835
}
+ − 836
+ − 837
def flattenSeq(r: Rexp) : String = r match {
+ − 838
case CHAR(c) => c.toString
+ − 839
case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
+ − 840
case _ => throw new Error("flatten unflattenable rexp")
+ − 841
}
431
+ − 842
492
+ − 843
def shortRexpOutput(r: Rexp) : String = r match {
+ − 844
case CHAR(c) => c.toString
+ − 845
case ONE => "1"
+ − 846
case ZERO => "0"
+ − 847
case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 848
case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 849
case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
+ − 850
case STAR(STAR(r)) => "(..)*"// ++ shortRexpOutput(r) ++ "]*"
+ − 851
case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
+ − 852
//case RTOP => "RTOP"
+ − 853
}
+ − 854
431
+ − 855
492
+ − 856
def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 857
case Nil => {
+ − 858
if (bnullable(r)) {
+ − 859
val bits = mkepsBC(r)
+ − 860
bits
+ − 861
}
+ − 862
else
+ − 863
throw new Exception("Not matched")
+ − 864
}
+ − 865
case c::cs => {
+ − 866
val der_res = bder(c,r)
+ − 867
val simp_res = bsimp(der_res)
+ − 868
blex_simp(simp_res, cs)
+ − 869
}
431
+ − 870
}
+ − 871
+ − 872
492
+ − 873
+ − 874
+ − 875
def size(r: Rexp) : Int = r match {
+ − 876
case ZERO => 1
+ − 877
case ONE => 1
+ − 878
case CHAR(_) => 1
+ − 879
case ANYCHAR => 1
+ − 880
case NOT(r0) => 1 + size(r0)
+ − 881
case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 882
case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
+ − 883
case STAR(r) => 1 + size(r)
431
+ − 884
}
+ − 885
492
+ − 886
def asize(a: ARexp) = size(erase(a))
431
+ − 887
+ − 888
//pder related
+ − 889
type Mon = (Char, Rexp)
+ − 890
type Lin = Set[Mon]
+ − 891
+ − 892
def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
+ − 893
case ZERO => Set()
+ − 894
case ONE => rs
+ − 895
case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) )
+ − 896
}
+ − 897
def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
+ − 898
case ZERO => Set()
+ − 899
case ONE => l
+ − 900
case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) } )
+ − 901
}
+ − 902
def lf(r: Rexp): Lin = r match {
+ − 903
case ZERO => Set()
+ − 904
case ONE => Set()
+ − 905
case CHAR(f) => {
+ − 906
//val Some(c) = alphabet.find(f)
+ − 907
Set((f, ONE))
+ − 908
}
+ − 909
case ALTS(r1, r2) => {
+ − 910
lf(r1 ) ++ lf(r2)
+ − 911
}
+ − 912
case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
+ − 913
case SEQ(r1, r2) =>{
+ − 914
if (nullable(r1))
+ − 915
cir_prod(lf(r1), r2) ++ lf(r2)
+ − 916
else
+ − 917
cir_prod(lf(r1), r2)
+ − 918
}
+ − 919
}
+ − 920
def lfs(r: Set[Rexp]): Lin = {
+ − 921
r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
+ − 922
}
+ − 923
+ − 924
def pder(x: Char, t: Rexp): Set[Rexp] = {
+ − 925
val lft = lf(t)
+ − 926
(lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
+ − 927
}
+ − 928
def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
+ − 929
case x::xs => pders(xs, pder(x, t))
+ − 930
case Nil => Set(t)
+ − 931
}
+ − 932
def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
+ − 933
case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 934
case Nil => ts
+ − 935
}
+ − 936
def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
+ − 937
def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
+ − 938
//all implementation of partial derivatives that involve set union are potentially buggy
+ − 939
//because they don't include the original regular term before they are pdered.
+ − 940
//now only pderas is fixed.
+ − 941
def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
+ − 942
def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
+ − 943
def awidth(r: Rexp) : Int = r match {
+ − 944
case CHAR(c) => 1
+ − 945
case SEQ(r1, r2) => awidth(r1) + awidth(r2)
+ − 946
case ALTS(r1, r2) => awidth(r1) + awidth(r2)
+ − 947
case ONE => 0
+ − 948
case ZERO => 0
+ − 949
case STAR(r) => awidth(r)
+ − 950
}
+ − 951
//def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
+ − 952
//def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
+ − 953
def o(r: Rexp) = if (nullable(r)) ONE else ZERO
+ − 954
//def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
+ − 955
def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
+ − 956
case ZERO => Set[Rexp]()
+ − 957
case ONE => Set[Rexp]()
+ − 958
case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
+ − 959
case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
+ − 960
case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
+ − 961
case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
+ − 962
}
+ − 963
def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
+ − 964
case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 965
case Nil => ts
+ − 966
}
+ − 967
def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
+ − 968
+ − 969
+ − 970
+ − 971
def starPrint(r: Rexp) : Unit = r match {
+ − 972
+ − 973
case SEQ(head, rstar) =>
+ − 974
println(shortRexpOutput(head) ++ "~STARREG")
+ − 975
case STAR(rstar) =>
+ − 976
println("STARREG")
+ − 977
case ALTS(r1, r2) =>
+ − 978
println("(")
+ − 979
starPrint(r1)
+ − 980
println("+")
+ − 981
starPrint(r2)
+ − 982
println(")")
+ − 983
case ZERO => println("0")
+ − 984
+ − 985
}
+ − 986
+ − 987
// @arg(doc = "small tests")
+ − 988
val STARREG = //((( (STAR("aa")) | STAR("aaa") ).%).%)
+ − 989
(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa"| ( STAR("aaaaaaa")) | STAR("aaaaaaaaaaa"))).%).%).%
+ − 990
+ − 991
// @main
492
+ − 992
+ − 993
+ − 994
431
+ − 995
def small() = {
+ − 996
+ − 997
+ − 998
// println(lexing_simp(NOTREG, prog0))
+ − 999
// val v = lex_simp(NOTREG, prog0.toList)
+ − 1000
// println(v)
+ − 1001
+ − 1002
// val d = (lex_simp(NOTREG, prog0.toList))
+ − 1003
// println(d)
+ − 1004
val pderSTAR = pderUNIV(STARREG)
+ − 1005
+ − 1006
val refSize = pderSTAR.map(size(_)).sum
435
+ − 1007
// println("different partial derivative terms:")
+ − 1008
// pderSTAR.foreach(r => r match {
431
+ − 1009
435
+ − 1010
// case SEQ(head, rstar) =>
+ − 1011
// println(shortRexpOutput(head) ++ "~STARREG")
+ − 1012
// case STAR(rstar) =>
+ − 1013
// println("STARREG")
431
+ − 1014
435
+ − 1015
// }
+ − 1016
// )
+ − 1017
// println("the total number of terms is")
+ − 1018
// //println(refSize)
+ − 1019
// println(pderSTAR.size)
431
+ − 1020
492
+ − 1021
// val A : Rexp= ("c" | (ONE | "b") ~ "d") ~((ONE).%)
+ − 1022
// val B : Rexp = ((ONE).%)
+ − 1023
// val C : Rexp = ("d") ~ ((ONE).%)
+ − 1024
// val PRUNE_REG : Rexp = (C | B | A)
+ − 1025
// val APRUNE_REG = internalise(PRUNE_REG)
+ − 1026
// val program_solution = pruneRexp(APRUNE_REG, breakIntoTerms(PRUNE_REG))
+ − 1027
// println("program executes and gives: as disired!")
+ − 1028
// println(shortRexpOutput(erase(program_solution)))
435
+ − 1029
// val simpedPruneReg = strongBsimp(APRUNE_REG)
+ − 1030
+ − 1031
// println(shortRexpOutput(erase(simpedPruneReg)))
492
+ − 1032
+ − 1033
+ − 1034
for(i <- List(1,2,3,4,10, 100, 400, 841, 900 ) ){// 100, 400, 800, 840, 841, 900
+ − 1035
val prog0 = "a" * i
+ − 1036
//println(s"test: $prog0")
+ − 1037
println(s"testing with $i a's" )
+ − 1038
//val bd = bdersSimp(prog0, STARREG)//DB
+ − 1039
val sbd = bdersStrongRexp(prog0, STARREG)//strongDB
+ − 1040
starPrint(erase(sbd))
+ − 1041
val subTerms = breakIntoTerms(erase(sbd))
+ − 1042
//val subTermsLarge = breakIntoTerms(erase(bd))
431
+ − 1043
492
+ − 1044
println(s"subterms of regex with strongDB: ${subTerms.length}")//, standard DB: ${subTermsLarge.length}")
431
+ − 1045
+ − 1046
+ − 1047
492
+ − 1048
println("the number of distinct subterms for bsimp with strongDB")
+ − 1049
println(subTerms.distinct.size)
+ − 1050
//println(subTermsLarge.distinct.size)
+ − 1051
if(pderSTAR.size > subTerms.length)
+ − 1052
println(s"which is less than or equal to the number of PDER terms: ${pderSTAR.size}")
431
+ − 1053
+ − 1054
492
+ − 1055
// println(shortRexpOutput(erase(sbd)))
+ − 1056
// println(shortRexpOutput(erase(bd)))
431
+ − 1057
492
+ − 1058
println("pdersize, original, strongSimp")
+ − 1059
println(refSize, size(STARREG), asize(sbd))
431
+ − 1060
492
+ − 1061
//val vres = strong_blexing_simp( STARREG, prog0)
+ − 1062
//println(vres)
+ − 1063
}
+ − 1064
// println(vs.length)
+ − 1065
// println(vs)
431
+ − 1066
+ − 1067
+ − 1068
// val prog1 = """read n; write n"""
+ − 1069
// println(s"test: $prog1")
+ − 1070
// println(lexing_simp(WHILE_REGS, prog1))
492
+ − 1071
// val display = ("a"| "ab").%
+ − 1072
// val adisplay = internalise(display)
+ − 1073
// bders_simp( "aaaaa".toList, adisplay)
431
+ − 1074
}
+ − 1075
492
+ − 1076
def generator_test() {
+ − 1077
494
+ − 1078
// test(rexp(7), 1000) { (r: Rexp) =>
+ − 1079
// val ss = stringsFromRexp(r)
+ − 1080
// val boolList = ss.map(s => {
+ − 1081
// val simpVal = simpBlexer(r, s)
+ − 1082
// val strongVal = strongBlexer(r, s)
+ − 1083
// // println(simpVal)
+ − 1084
// // println(strongVal)
+ − 1085
// (simpVal == strongVal) && (simpVal != None)
+ − 1086
// })
+ − 1087
// !boolList.exists(b => b == false)
+ − 1088
// }
+ − 1089
// test(single(STAR(ALTS(STAR(CHAR('c')),ALTS(CHAR('c'),ZERO)))), 100000) { (r: Rexp) =>
+ − 1090
// val ss = stringsFromRexp(r)
+ − 1091
// val boolList = ss.map(s => {
+ − 1092
// val bdStrong = bdersStrong(s.toList, internalise(r))
+ − 1093
// val bdStrong5 = bdersStrong5(s.toList, internalise(r))
+ − 1094
// // println(shortRexpOutput(r))
+ − 1095
// // println(s)
+ − 1096
// // println(strongVal)
+ − 1097
// // println(strongVal5)
+ − 1098
// // (strongVal == strongVal5)
493
+ − 1099
494
+ − 1100
// if(asize(bdStrong5) > asize(bdStrong)){
+ − 1101
// println(s)
+ − 1102
// println(shortRexpOutput(erase(bdStrong5)))
+ − 1103
// println(shortRexpOutput(erase(bdStrong)))
+ − 1104
// }
+ − 1105
// asize(bdStrong5) <= asize(bdStrong)
+ − 1106
// })
+ − 1107
// !boolList.exists(b => b == false)
+ − 1108
// }
500
+ − 1109
//*** example where bdStrong5 has a smaller size than bdStrong
+ − 1110
// test(single(STAR(SEQ(ALTS(SEQ(STAR(CHAR('a')),ALTS(ALTS(ONE,ZERO),SEQ(ONE,ONE))),CHAR('a')),ONE))), 1) { (r: Rexp) =>
+ − 1111
//*** depth 5 example bdStrong5 larger size than bdStrong
+ − 1112
// test(single(STAR(SEQ(SEQ(ALTS(CHAR('b'),STAR(CHAR('b'))),CHAR('b')),(ALTS(STAR(CHAR('c')), ONE))))), 1) {(r: Rexp) =>
+ − 1113
+ − 1114
+ − 1115
+ − 1116
//sanity check from Christian's request
+ − 1117
// val r = ("a" | "ab") ~ ("bc" | "c")
+ − 1118
// val a = internalise(r)
+ − 1119
// val aval = blexing_simp(r, "abc")
+ − 1120
// println(aval)
+ − 1121
+ − 1122
//sample counterexample:(depth 7)
+ − 1123
//STAR(SEQ(ALTS(STAR(STAR(STAR(STAR(CHAR(c))))),ALTS(CHAR(c),CHAR(b))),ALTS(ZERO,SEQ(ALTS(ALTS(STAR(CHAR(c)),SEQ(CHAR(b),CHAR(a))),CHAR(c)),STAR(ALTS(ALTS(ONE,CHAR(a)),STAR(CHAR(c))))))))
+ − 1124
//(depth5)
+ − 1125
//STAR(SEQ(ALTS(ALTS(STAR(CHAR(b)),SEQ(ONE,CHAR(b))),SEQ(STAR(CHAR(a)),CHAR(b))),ALTS(ZERO,ALTS(STAR(CHAR(b)),STAR(CHAR(a))))))
+ − 1126
test(rexp(4), 10000000) { (r: Rexp) =>
+ − 1127
// ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) =>
493
+ − 1128
val ss = stringsFromRexp(r)
+ − 1129
val boolList = ss.map(s => {
494
+ − 1130
val bdStrong = bdersStrong(s.toList, internalise(r))
+ − 1131
val bdStrong5 = bdersStrong5(s.toList, internalise(r))
500
+ − 1132
val bdFurther5 = strongBsimp5(bdStrong5)
+ − 1133
val sizeFurther5 = asize(bdFurther5)
+ − 1134
val pdersSet = pderUNIV(r)
494
+ − 1135
val size5 = asize(bdStrong5)
+ − 1136
val size0 = asize(bdStrong)
+ − 1137
// println(s)
500
+ − 1138
// println("pdersSet size")
+ − 1139
// println(pdersSet.size)
+ − 1140
// pdersSet.foreach(r => println(shortRexpOutput(r)))
+ − 1141
// println("after bdStrong5")
+ − 1142
+ − 1143
// println(shortRexpOutput(erase(bdStrong5)))
+ − 1144
// println(breakIntoTerms(erase(bdStrong5)).size)
+ − 1145
+ − 1146
// println("after bdStrong")
+ − 1147
// println(shortRexpOutput(erase(bdStrong)))
+ − 1148
// println(breakIntoTerms(erase(bdStrong)).size)
+ − 1149
// println(size5, size0, sizeFurther5)
+ − 1150
// aprint(strongBsimp5(bdStrong))
+ − 1151
// println(asize(strongBsimp5(bdStrong5)))
+ − 1152
// println(s)
+ − 1153
size5 <= size0
493
+ − 1154
})
494
+ − 1155
// println(boolList)
500
+ − 1156
//!boolList.exists(b => b == false)
493
+ − 1157
!boolList.exists(b => b == false)
492
+ − 1158
}
493
+ − 1159
+ − 1160
492
+ − 1161
}
+ − 1162
// small()
+ − 1163
generator_test()
493
+ − 1164
+ − 1165
1
+ − 1166
+ − 1167
SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))),
+ − 1168
SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))),
+ − 1169
STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))),
+ − 1170
SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE))
+ − 1171
+ − 1172
+ − 1173
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))
+ − 1174
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))