431
+ − 1
// A simple lexer inspired by work of Sulzmann & Lu
+ − 2
//==================================================
+ − 3
//
+ − 4
// Call the test cases with
+ − 5
//
+ − 6
// amm lexer.sc small
+ − 7
// amm lexer.sc fib
+ − 8
// amm lexer.sc loops
+ − 9
// amm lexer.sc email
+ − 10
//
+ − 11
// amm lexer.sc all
+ − 12
492
+ − 13
import scala.util.Try
431
+ − 14
+ − 15
// regular expressions including records
+ − 16
abstract class Rexp
+ − 17
case object ZERO extends Rexp
+ − 18
case object ONE extends Rexp
+ − 19
case object ANYCHAR extends Rexp
+ − 20
case class CHAR(c: Char) extends Rexp
+ − 21
case class ALTS(r1: Rexp, r2: Rexp) extends Rexp
+ − 22
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+ − 23
case class STAR(r: Rexp) extends Rexp
+ − 24
case class RECD(x: String, r: Rexp) extends Rexp
+ − 25
case class NTIMES(n: Int, r: Rexp) extends Rexp
+ − 26
case class OPTIONAL(r: Rexp) extends Rexp
+ − 27
case class NOT(r: Rexp) extends Rexp
+ − 28
// records for extracting strings or tokens
+ − 29
+ − 30
// values
+ − 31
abstract class Val
492
+ − 32
case object Failure extends Val
431
+ − 33
case object Empty extends Val
+ − 34
case class Chr(c: Char) extends Val
+ − 35
case class Sequ(v1: Val, v2: Val) extends Val
+ − 36
case class Left(v: Val) extends Val
+ − 37
case class Right(v: Val) extends Val
+ − 38
case class Stars(vs: List[Val]) extends Val
+ − 39
case class Rec(x: String, v: Val) extends Val
+ − 40
case class Ntime(vs: List[Val]) extends Val
+ − 41
case class Optionall(v: Val) extends Val
+ − 42
case class Nots(s: String) extends Val
+ − 43
+ − 44
+ − 45
+ − 46
abstract class Bit
+ − 47
case object Z extends Bit
+ − 48
case object S extends Bit
+ − 49
+ − 50
+ − 51
type Bits = List[Bit]
+ − 52
+ − 53
abstract class ARexp
+ − 54
case object AZERO extends ARexp
+ − 55
case class AONE(bs: Bits) extends ARexp
+ − 56
case class ACHAR(bs: Bits, c: Char) extends ARexp
+ − 57
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp
+ − 58
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp
+ − 59
case class ASTAR(bs: Bits, r: ARexp) extends ARexp
+ − 60
case class ANOT(bs: Bits, r: ARexp) extends ARexp
+ − 61
case class AANYCHAR(bs: Bits) extends ARexp
+ − 62
492
+ − 63
trait Generator[+T] {
+ − 64
self => // an alias for "this"
+ − 65
def generate(): T
+ − 66
+ − 67
def gen(n: Int) : List[T] =
+ − 68
if (n == 0) Nil else self.generate() :: gen(n - 1)
+ − 69
+ − 70
def map[S](f: T => S): Generator[S] = new Generator[S] {
+ − 71
def generate = f(self.generate())
+ − 72
}
+ − 73
def flatMap[S](f: T => Generator[S]): Generator[S] = new Generator[S] {
+ − 74
def generate = f(self.generate()).generate()
+ − 75
}
+ − 76
+ − 77
+ − 78
}
+ − 79
+ − 80
// tests a property according to a given random generator
+ − 81
def test[T](r: Generator[T], amount: Int = 100)(pred: T => Boolean) {
+ − 82
for (_ <- 0 until amount) {
+ − 83
val value = r.generate()
+ − 84
assert(pred(value), s"Test failed for: $value")
+ − 85
}
+ − 86
println(s"Test passed $amount times")
+ − 87
}
+ − 88
def test2[T, S](r: Generator[T], s: Generator[S], amount: Int = 100)(pred: (T, S) => Boolean) {
+ − 89
for (_ <- 0 until amount) {
+ − 90
val valueR = r.generate()
+ − 91
val valueS = s.generate()
+ − 92
assert(pred(valueR, valueS), s"Test failed for: $valueR, $valueS")
+ − 93
}
+ − 94
println(s"Test passed $amount times")
+ − 95
}
+ − 96
+ − 97
// random integers
+ − 98
val integers = new Generator[Int] {
+ − 99
val rand = new java.util.Random
+ − 100
def generate() = rand.nextInt()
+ − 101
}
+ − 102
+ − 103
// random booleans
+ − 104
val booleans = integers.map(_ > 0)
+ − 105
+ − 106
// random integers in the range lo and high
+ − 107
def range(lo: Int, hi: Int): Generator[Int] =
+ − 108
for (x <- integers) yield (lo + x.abs % (hi - lo)).abs
+ − 109
+ − 110
// random characters
+ − 111
def chars_range(lo: Char, hi: Char) = range(lo, hi).map(_.toChar)
+ − 112
val chars = chars_range('a', 'z')
+ − 113
+ − 114
+ − 115
def oneOf[T](xs: T*): Generator[T] =
+ − 116
for (idx <- range(0, xs.length)) yield xs(idx)
+ − 117
+ − 118
def single[T](x: T) = new Generator[T] {
+ − 119
def generate() = x
+ − 120
}
+ − 121
+ − 122
def pairs[T, U](t: Generator[T], u: Generator[U]): Generator[(T, U)] =
+ − 123
for (x <- t; y <- u) yield (x, y)
+ − 124
+ − 125
// lists
+ − 126
def emptyLists = single(Nil)
+ − 127
+ − 128
def nonEmptyLists : Generator[List[Int]] =
+ − 129
for (head <- integers; tail <- lists) yield head :: tail
+ − 130
+ − 131
def lists: Generator[List[Int]] = for {
+ − 132
kind <- booleans
+ − 133
list <- if (kind) emptyLists else nonEmptyLists
+ − 134
} yield list
+ − 135
+ − 136
def char_list(len: Int): Generator[List[Char]] = {
+ − 137
if(len <= 0) single(Nil)
+ − 138
else{
+ − 139
for {
493
+ − 140
c <- chars_range('a', 'd')
492
+ − 141
tail <- char_list(len - 1)
+ − 142
} yield c :: tail
+ − 143
}
+ − 144
}
+ − 145
+ − 146
def strings(len: Int): Generator[String] = for(cs <- char_list(len)) yield cs.toString
+ − 147
493
+ − 148
def sampleString(r: Rexp) : List[String] = r match {
+ − 149
case STAR(r) => stringsFromRexp(r).flatMap(s => List("", s, s ++ s))//only generate 0, 1, 2 reptitions
+ − 150
case SEQ(r1, r2) => stringsFromRexp(r1).flatMap(s1 => stringsFromRexp(r2).map(s2 => s1 ++ s2) )
+ − 151
case ALTS(r1, r2) => throw new Error(s" Rexp ${r} not expected: all alternatives are supposed to have been opened up")
+ − 152
case ONE => "" :: Nil
+ − 153
case ZERO => Nil
+ − 154
case CHAR(c) => c.toString :: Nil
+ − 155
+ − 156
}
+ − 157
+ − 158
def stringsFromRexp(r: Rexp) : List[String] =
+ − 159
breakIntoTerms(r).flatMap(r => sampleString(r))
+ − 160
492
+ − 161
+ − 162
// (simple) binary trees
+ − 163
trait Tree[T]
+ − 164
case class Inner[T](left: Tree[T], right: Tree[T]) extends Tree[T]
+ − 165
case class Leaf[T](x: T) extends Tree[T]
+ − 166
+ − 167
def leafs[T](t: Generator[T]): Generator[Leaf[T]] =
+ − 168
for (x <- t) yield Leaf(x)
+ − 169
+ − 170
def inners[T](t: Generator[T]): Generator[Inner[T]] =
+ − 171
for (l <- trees(t); r <- trees(t)) yield Inner(l, r)
+ − 172
+ − 173
def trees[T](t: Generator[T]): Generator[Tree[T]] =
+ − 174
for (kind <- range(0, 2);
+ − 175
tree <- if (kind == 0) leafs(t) else inners(t)) yield tree
+ − 176
+ − 177
// regular expressions
+ − 178
+ − 179
// generates random leaf-regexes; prefers CHAR-regexes
+ − 180
def leaf_rexp() : Generator[Rexp] =
+ − 181
for (kind <- range(0, 5);
+ − 182
c <- chars_range('a', 'd')) yield
+ − 183
kind match {
+ − 184
case 0 => ZERO
+ − 185
case 1 => ONE
+ − 186
case _ => CHAR(c)
+ − 187
}
+ − 188
+ − 189
// generates random inner regexes with maximum depth d
+ − 190
def inner_rexp(d: Int) : Generator[Rexp] =
+ − 191
for (kind <- range(0, 3);
+ − 192
l <- rexp(d);
+ − 193
r <- rexp(d))
+ − 194
yield kind match {
+ − 195
case 0 => ALTS(l, r)
+ − 196
case 1 => SEQ(l, r)
+ − 197
case 2 => STAR(r)
+ − 198
}
+ − 199
+ − 200
// generates random regexes with maximum depth d;
+ − 201
// prefers inner regexes in 2/3 of the cases
+ − 202
def rexp(d: Int = 100): Generator[Rexp] =
+ − 203
for (kind <- range(0, 3);
+ − 204
r <- if (d <= 0 || kind == 0) leaf_rexp() else inner_rexp(d - 1)) yield r
+ − 205
+ − 206
+ − 207
// some test functions for rexps
+ − 208
def height(r: Rexp) : Int = r match {
+ − 209
case ZERO => 1
+ − 210
case ONE => 1
+ − 211
case CHAR(_) => 1
+ − 212
case ALTS(r1, r2) => 1 + List(height(r1), height(r2)).max
+ − 213
case SEQ(r1, r2) => 1 + List(height(r1), height(r2)).max
+ − 214
case STAR(r) => 1 + height(r)
+ − 215
}
+ − 216
+ − 217
// def size(r: Rexp) : Int = r match {
+ − 218
// case ZERO => 1
+ − 219
// case ONE => 1
+ − 220
// case CHAR(_) => 1
+ − 221
// case ALTS(r1, r2) => 1 + size(r1) + size(r2)
+ − 222
// case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 223
// case STAR(r) => 1 + size(r)
+ − 224
// }
+ − 225
+ − 226
// randomly subtracts 1 or 2 from the STAR case
+ − 227
def size_faulty(r: Rexp) : Int = r match {
+ − 228
case ZERO => 1
+ − 229
case ONE => 1
+ − 230
case CHAR(_) => 1
+ − 231
case ALTS(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
+ − 232
case SEQ(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
+ − 233
case STAR(r) => 1 + size_faulty(r) - range(0, 2).generate
+ − 234
}
+ − 235
431
+ − 236
+ − 237
+ − 238
// some convenience for typing in regular expressions
+ − 239
+ − 240
def charlist2rexp(s : List[Char]): Rexp = s match {
+ − 241
case Nil => ONE
+ − 242
case c::Nil => CHAR(c)
+ − 243
case c::s => SEQ(CHAR(c), charlist2rexp(s))
+ − 244
}
+ − 245
implicit def string2rexp(s : String) : Rexp =
+ − 246
charlist2rexp(s.toList)
+ − 247
+ − 248
implicit def RexpOps(r: Rexp) = new {
+ − 249
def | (s: Rexp) = ALTS(r, s)
+ − 250
def % = STAR(r)
+ − 251
def ~ (s: Rexp) = SEQ(r, s)
+ − 252
}
+ − 253
+ − 254
implicit def stringOps(s: String) = new {
+ − 255
def | (r: Rexp) = ALTS(s, r)
+ − 256
def | (r: String) = ALTS(s, r)
+ − 257
def % = STAR(s)
+ − 258
def ~ (r: Rexp) = SEQ(s, r)
+ − 259
def ~ (r: String) = SEQ(s, r)
+ − 260
def $ (r: Rexp) = RECD(s, r)
+ − 261
}
+ − 262
+ − 263
def nullable(r: Rexp) : Boolean = r match {
+ − 264
case ZERO => false
+ − 265
case ONE => true
+ − 266
case CHAR(_) => false
+ − 267
case ANYCHAR => false
+ − 268
case ALTS(r1, r2) => nullable(r1) || nullable(r2)
+ − 269
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ − 270
case STAR(_) => true
+ − 271
case RECD(_, r1) => nullable(r1)
+ − 272
case NTIMES(n, r) => if (n == 0) true else nullable(r)
+ − 273
case OPTIONAL(r) => true
+ − 274
case NOT(r) => !nullable(r)
+ − 275
}
+ − 276
+ − 277
def der(c: Char, r: Rexp) : Rexp = r match {
+ − 278
case ZERO => ZERO
+ − 279
case ONE => ZERO
+ − 280
case CHAR(d) => if (c == d) ONE else ZERO
+ − 281
case ANYCHAR => ONE
+ − 282
case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
+ − 283
case SEQ(r1, r2) =>
+ − 284
if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
+ − 285
else SEQ(der(c, r1), r2)
+ − 286
case STAR(r) => SEQ(der(c, r), STAR(r))
+ − 287
case RECD(_, r1) => der(c, r1)
+ − 288
case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
+ − 289
case OPTIONAL(r) => der(c, r)
+ − 290
case NOT(r) => NOT(der(c, r))
+ − 291
}
+ − 292
+ − 293
+ − 294
// extracts a string from a value
+ − 295
def flatten(v: Val) : String = v match {
+ − 296
case Empty => ""
+ − 297
case Chr(c) => c.toString
+ − 298
case Left(v) => flatten(v)
+ − 299
case Right(v) => flatten(v)
+ − 300
case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
+ − 301
case Stars(vs) => vs.map(flatten).mkString
+ − 302
case Ntime(vs) => vs.map(flatten).mkString
+ − 303
case Optionall(v) => flatten(v)
+ − 304
case Rec(_, v) => flatten(v)
+ − 305
}
+ − 306
+ − 307
+ − 308
// extracts an environment from a value;
+ − 309
// used for tokenising a string
+ − 310
def env(v: Val) : List[(String, String)] = v match {
+ − 311
case Empty => Nil
+ − 312
case Chr(c) => Nil
+ − 313
case Left(v) => env(v)
+ − 314
case Right(v) => env(v)
+ − 315
case Sequ(v1, v2) => env(v1) ::: env(v2)
+ − 316
case Stars(vs) => vs.flatMap(env)
+ − 317
case Ntime(vs) => vs.flatMap(env)
+ − 318
case Rec(x, v) => (x, flatten(v))::env(v)
+ − 319
case Optionall(v) => env(v)
+ − 320
case Nots(s) => ("Negative", s) :: Nil
+ − 321
}
+ − 322
+ − 323
+ − 324
// The injection and mkeps part of the lexer
+ − 325
//===========================================
+ − 326
+ − 327
def mkeps(r: Rexp) : Val = r match {
+ − 328
case ONE => Empty
+ − 329
case ALTS(r1, r2) =>
+ − 330
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+ − 331
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+ − 332
case STAR(r) => Stars(Nil)
+ − 333
case RECD(x, r) => Rec(x, mkeps(r))
+ − 334
case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
+ − 335
case OPTIONAL(r) => Optionall(Empty)
+ − 336
case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")
+ − 337
else Nots("")//Nots(s.reverse.toString)
+ − 338
// case NOT(ZERO) => Empty
+ − 339
// case NOT(CHAR(c)) => Empty
+ − 340
// case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
+ − 341
// case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
+ − 342
// case NOT(STAR(r)) => Stars(Nil)
+ − 343
+ − 344
}
+ − 345
+ − 346
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+ − 347
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+ − 348
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+ − 349
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+ − 350
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+ − 351
case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
+ − 352
case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
+ − 353
case (CHAR(d), Empty) => Chr(c)
+ − 354
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+ − 355
case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
+ − 356
case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
+ − 357
case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
+ − 358
case (ANYCHAR, Empty) => Chr(c)
+ − 359
}
+ − 360
+ − 361
// some "rectification" functions for simplification
+ − 362
+ − 363
+ − 364
+ − 365
+ − 366
// The Lexing Rules for the WHILE Language
+ − 367
+ − 368
// bnullable function: tests whether the aregular
+ − 369
// expression can recognise the empty string
+ − 370
def bnullable (r: ARexp) : Boolean = r match {
+ − 371
case AZERO => false
+ − 372
case AONE(_) => true
+ − 373
case ACHAR(_,_) => false
+ − 374
case AALTS(_, rs) => rs.exists(bnullable)
+ − 375
case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
+ − 376
case ASTAR(_, _) => true
+ − 377
case ANOT(_, rn) => !bnullable(rn)
+ − 378
}
+ − 379
+ − 380
def mkepsBC(r: ARexp) : Bits = r match {
+ − 381
case AONE(bs) => bs
+ − 382
case AALTS(bs, rs) => {
+ − 383
val n = rs.indexWhere(bnullable)
+ − 384
bs ++ mkepsBC(rs(n))
+ − 385
}
+ − 386
case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
+ − 387
case ASTAR(bs, r) => bs ++ List(Z)
+ − 388
case ANOT(bs, rn) => bs
+ − 389
}
+ − 390
+ − 391
+ − 392
def bder(c: Char, r: ARexp) : ARexp = r match {
+ − 393
case AZERO => AZERO
+ − 394
case AONE(_) => AZERO
+ − 395
case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
+ − 396
case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
+ − 397
case ASEQ(bs, r1, r2) =>
+ − 398
if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
+ − 399
else ASEQ(bs, bder(c, r1), r2)
+ − 400
case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
+ − 401
case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
+ − 402
case AANYCHAR(bs) => AONE(bs)
+ − 403
}
+ − 404
+ − 405
def fuse(bs: Bits, r: ARexp) : ARexp = r match {
+ − 406
case AZERO => AZERO
+ − 407
case AONE(cs) => AONE(bs ++ cs)
+ − 408
case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
+ − 409
case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
+ − 410
case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
+ − 411
case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
+ − 412
case ANOT(cs, r) => ANOT(bs ++ cs, r)
+ − 413
}
+ − 414
+ − 415
+ − 416
def internalise(r: Rexp) : ARexp = r match {
+ − 417
case ZERO => AZERO
+ − 418
case ONE => AONE(Nil)
+ − 419
case CHAR(c) => ACHAR(Nil, c)
+ − 420
//case PRED(f) => APRED(Nil, f)
+ − 421
case ALTS(r1, r2) =>
+ − 422
AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
+ − 423
// case ALTS(r1::rs) => {
+ − 424
// val AALTS(Nil, rs2) = internalise(ALTS(rs))
+ − 425
// AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
+ − 426
// }
+ − 427
case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
+ − 428
case STAR(r) => ASTAR(Nil, internalise(r))
+ − 429
case RECD(x, r) => internalise(r)
+ − 430
case NOT(r) => ANOT(Nil, internalise(r))
+ − 431
case ANYCHAR => AANYCHAR(Nil)
+ − 432
}
+ − 433
+ − 434
+ − 435
def bsimp(r: ARexp): ARexp =
+ − 436
{
+ − 437
r match {
+ − 438
case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
+ − 439
case (AZERO, _) => AZERO
+ − 440
case (_, AZERO) => AZERO
+ − 441
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 442
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 443
}
+ − 444
case AALTS(bs1, rs) => {
+ − 445
val rs_simp = rs.map(bsimp(_))
+ − 446
val flat_res = flats(rs_simp)
+ − 447
val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
+ − 448
dist_res match {
+ − 449
case Nil => AZERO
+ − 450
case s :: Nil => fuse(bs1, s)
+ − 451
case rs => AALTS(bs1, rs)
+ − 452
}
+ − 453
+ − 454
}
+ − 455
case r => r
+ − 456
}
492
+ − 457
}
+ − 458
def strongBsimp(r: ARexp): ARexp =
+ − 459
{
+ − 460
r match {
+ − 461
case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
+ − 462
case (AZERO, _) => AZERO
+ − 463
case (_, AZERO) => AZERO
+ − 464
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 465
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
431
+ − 466
}
492
+ − 467
case AALTS(bs1, rs) => {
+ − 468
val rs_simp = rs.map(strongBsimp(_))
+ − 469
val flat_res = flats(rs_simp)
+ − 470
val dist_res = distinctBy4(flat_res)//distinctBy(flat_res, erase)
+ − 471
dist_res match {
+ − 472
case Nil => AZERO
+ − 473
case s :: Nil => fuse(bs1, s)
+ − 474
case rs => AALTS(bs1, rs)
+ − 475
}
+ − 476
+ − 477
}
+ − 478
case r => r
431
+ − 479
}
492
+ − 480
}
431
+ − 481
494
+ − 482
def strongBsimp5(r: ARexp): ARexp =
+ − 483
{
+ − 484
// println("was this called?")
+ − 485
r match {
+ − 486
case ASEQ(bs1, r1, r2) => (strongBsimp5(r1), strongBsimp5(r2)) match {
+ − 487
case (AZERO, _) => AZERO
+ − 488
case (_, AZERO) => AZERO
+ − 489
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 490
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 491
}
+ − 492
case AALTS(bs1, rs) => {
+ − 493
// println("alts case")
+ − 494
val rs_simp = rs.map(strongBsimp5(_))
+ − 495
+ − 496
val flat_res = flats(rs_simp)
+ − 497
+ − 498
val dist_res = distinctBy5(flat_res)//distinctBy(flat_res, erase)
+ − 499
+ − 500
dist_res match {
+ − 501
case Nil => AZERO
+ − 502
case s :: Nil => fuse(bs1, s)
+ − 503
case rs => AALTS(bs1, rs)
+ − 504
}
+ − 505
+ − 506
}
+ − 507
case r => r
+ − 508
}
+ − 509
}
+ − 510
492
+ − 511
def bders (s: List[Char], r: ARexp) : ARexp = s match {
+ − 512
case Nil => r
+ − 513
case c::s => bders(s, bder(c, r))
+ − 514
}
431
+ − 515
492
+ − 516
def flats(rs: List[ARexp]): List[ARexp] = rs match {
431
+ − 517
case Nil => Nil
492
+ − 518
case AZERO :: rs1 => flats(rs1)
+ − 519
case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
+ − 520
case r1 :: rs2 => r1 :: flats(rs2)
431
+ − 521
}
+ − 522
492
+ − 523
def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
+ − 524
case Nil => Nil
+ − 525
case (x::xs) => {
+ − 526
val res = f(x)
+ − 527
if (acc.contains(res)) distinctBy(xs, f, acc)
+ − 528
else x::distinctBy(xs, f, res::acc)
431
+ − 529
}
492
+ − 530
}
431
+ − 531
+ − 532
492
+ − 533
def pruneRexp(r: ARexp, allowableTerms: List[Rexp]) : ARexp = {
+ − 534
r match {
+ − 535
case ASEQ(bs, r1, r2) =>
+ − 536
val termsTruncated = allowableTerms.collect(rt => rt match {
+ − 537
case SEQ(r1p, r2p) if(r2p == erase(r2)) => r1p//if(r2p == erase(r2))
+ − 538
})
+ − 539
val pruned : ARexp = pruneRexp(r1, termsTruncated)
+ − 540
pruned match {
+ − 541
case AZERO => AZERO
+ − 542
case AONE(bs1) => fuse(bs ++ bs1, r2)
+ − 543
case pruned1 => ASEQ(bs, pruned1, r2)
+ − 544
}
+ − 545
+ − 546
case AALTS(bs, rs) =>
+ − 547
//allowableTerms.foreach(a => println(shortRexpOutput(a)))
+ − 548
val rsp = rs.map(r =>
+ − 549
pruneRexp(r, allowableTerms)
+ − 550
)
+ − 551
.filter(r => r != AZERO)
+ − 552
rsp match {
+ − 553
case Nil => AZERO
+ − 554
case r1::Nil => fuse(bs, r1)
+ − 555
case rs1 => AALTS(bs, rs1)
+ − 556
}
+ − 557
case r =>
+ − 558
if(allowableTerms.contains(erase(r))) r else AZERO //assert(r != AZERO)
+ − 559
}
+ − 560
}
+ − 561
+ − 562
def oneSimp(r: Rexp) : Rexp = r match {
+ − 563
case SEQ(ONE, r) => r
+ − 564
case SEQ(r1, r2) => SEQ(oneSimp(r1), r2)
+ − 565
case r => r//assert r != 0
432
+ − 566
492
+ − 567
}
431
+ − 568
+ − 569
492
+ − 570
def distinctBy4(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 571
case Nil => Nil
+ − 572
case x :: xs =>
+ − 573
//assert(acc.distinct == acc)
+ − 574
val erased = erase(x)
+ − 575
if(acc.contains(erased))
+ − 576
distinctBy4(xs, acc)
+ − 577
else{
+ − 578
val addToAcc = breakIntoTerms(erased).filter(r => !acc.contains(oneSimp(r)))
+ − 579
//val xp = pruneRexp(x, addToAcc)
+ − 580
pruneRexp(x, addToAcc) match {
+ − 581
case AZERO => distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 582
case xPrime => xPrime :: distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 583
}
431
+ − 584
}
492
+ − 585
}
+ − 586
494
+ − 587
// fun pAKC_aux :: "arexp list ⇒ arexp ⇒ rexp ⇒ arexp"
+ − 588
// where
+ − 589
// "pAKC_aux rsa r ctx = (if (L (SEQ (erase r) ( ctx) )) ⊆ (L (erase (AALTs [] rsa))) then AZERO else
+ − 590
// case r of (ASEQ bs r1 r2) ⇒
+ − 591
// bsimp_ASEQ bs (pAKC_aux rsa r1 (SEQ (erase r2) ( ctx) )) r2 |
+ − 592
// (AALTs bs rs) ⇒
+ − 593
// bsimp_AALTs bs (flts (map (λr. pAKC_aux rsa r ( ctx) ) rs) ) |
+ − 594
// r ⇒ r
+ − 595
+ − 596
// def canonicalSeq(rs: List[Rexp], acc: Rexp) = rs match {
+ − 597
// case r::Nil => SEQ(r, acc)
+ − 598
// case Nil => acc
+ − 599
// case r1::r2::Nil => SEQ(SEQ(r1, r2), acc)
+ − 600
// }
+ − 601
+ − 602
+ − 603
//the "fake" Language interpretation: just concatenates!
+ − 604
def L(acc: Rexp, rs: List[Rexp]) : Rexp = rs match {
+ − 605
case Nil => acc
+ − 606
case r :: rs1 =>
+ − 607
if(acc == ONE)
+ − 608
L(r, rs1)
+ − 609
else
+ − 610
L(SEQ(acc, r), rs1)
+ − 611
}
+ − 612
+ − 613
def pAKC(rs: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
+ − 614
if (breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(rs.contains)) {//rs.flatMap(breakIntoTerms
+ − 615
AZERO
+ − 616
}
+ − 617
else{
+ − 618
r match {
+ − 619
case ASEQ(bs, r1, r2) =>
+ − 620
(pAKC(rs, r1, erase(r2) :: ctx)) match{
+ − 621
case AZERO =>
+ − 622
AZERO
+ − 623
case AONE(bs1) =>
+ − 624
fuse(bs1, r2)
+ − 625
case r1p =>
+ − 626
ASEQ(bs, r1p, r2)
+ − 627
}
+ − 628
case AALTS(bs, rs0) =>
+ − 629
// println("before pruning")
+ − 630
// println(s"ctx is ")
+ − 631
// ctx.foreach(r => println(shortRexpOutput(r)))
+ − 632
// println(s"rs0 is ")
+ − 633
// rs0.foreach(r => println(shortRexpOutput(erase(r))))
+ − 634
// println(s"rs is ")
+ − 635
// rs.foreach(r => println(shortRexpOutput(r)))
+ − 636
rs0.map(r => pAKC(rs, r, ctx)).filter(_ != AZERO) match {
+ − 637
case Nil =>
+ − 638
// println("after pruning Nil")
+ − 639
AZERO
+ − 640
case r :: Nil =>
+ − 641
// println("after pruning singleton")
+ − 642
// println(shortRexpOutput(erase(r)))
+ − 643
r
+ − 644
case rs0p =>
+ − 645
// println("after pruning non-singleton")
+ − 646
AALTS(bs, rs0p)
+ − 647
}
+ − 648
case r => r
+ − 649
}
+ − 650
}
+ − 651
}
+ − 652
+ − 653
def distinctBy5(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 654
case Nil =>
+ − 655
Nil
+ − 656
case x :: xs => {
+ − 657
val erased = erase(x)
+ − 658
if(acc.contains(erased)){
+ − 659
distinctBy5(xs, acc)
+ − 660
}
+ − 661
else{
+ − 662
val pruned = pAKC(acc, x, Nil)
+ − 663
val newTerms = breakIntoTerms(erase(pruned))
+ − 664
pruned match {
+ − 665
case AZERO =>
+ − 666
distinctBy5(xs, acc)
+ − 667
case xPrime =>
+ − 668
xPrime :: distinctBy5(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 669
}
+ − 670
}
+ − 671
}
+ − 672
}
+ − 673
431
+ − 674
492
+ − 675
def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
+ − 676
case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
+ − 677
case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
494
+ − 678
case ZERO => Nil
492
+ − 679
case _ => r::Nil
+ − 680
}
431
+ − 681
+ − 682
+ − 683
492
+ − 684
def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
+ − 685
case (ONE, bs) => (Empty, bs)
+ − 686
case (CHAR(f), bs) => (Chr(f), bs)
+ − 687
case (ALTS(r1, r2), Z::bs1) => {
+ − 688
val (v, bs2) = decode_aux(r1, bs1)
+ − 689
(Left(v), bs2)
+ − 690
}
+ − 691
case (ALTS(r1, r2), S::bs1) => {
+ − 692
val (v, bs2) = decode_aux(r2, bs1)
+ − 693
(Right(v), bs2)
+ − 694
}
+ − 695
case (SEQ(r1, r2), bs) => {
+ − 696
val (v1, bs1) = decode_aux(r1, bs)
+ − 697
val (v2, bs2) = decode_aux(r2, bs1)
+ − 698
(Sequ(v1, v2), bs2)
+ − 699
}
+ − 700
case (STAR(r1), S::bs) => {
+ − 701
val (v, bs1) = decode_aux(r1, bs)
494
+ − 702
//(v)
492
+ − 703
val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
+ − 704
(Stars(v::vs), bs2)
+ − 705
}
+ − 706
case (STAR(_), Z::bs) => (Stars(Nil), bs)
+ − 707
case (RECD(x, r1), bs) => {
+ − 708
val (v, bs1) = decode_aux(r1, bs)
+ − 709
(Rec(x, v), bs1)
+ − 710
}
+ − 711
case (NOT(r), bs) => (Nots(r.toString), bs)
431
+ − 712
}
+ − 713
492
+ − 714
def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
+ − 715
case (v, Nil) => v
+ − 716
case _ => throw new Exception("Not decodable")
+ − 717
}
+ − 718
+ − 719
+ − 720
431
+ − 721
def blexing_simp(r: Rexp, s: String) : Val = {
+ − 722
val bit_code = blex_simp(internalise(r), s.toList)
+ − 723
decode(r, bit_code)
492
+ − 724
}
+ − 725
def simpBlexer(r: Rexp, s: String) : Val = {
+ − 726
Try(blexing_simp(r, s)).getOrElse(Failure)
+ − 727
}
431
+ − 728
492
+ − 729
def strong_blexing_simp(r: Rexp, s: String) : Val = {
+ − 730
decode(r, strong_blex_simp(internalise(r), s.toList))
+ − 731
}
+ − 732
494
+ − 733
def strong_blexing_simp5(r: Rexp, s: String) : Val = {
+ − 734
decode(r, strong_blex_simp5(internalise(r), s.toList))
+ − 735
}
+ − 736
+ − 737
492
+ − 738
def strongBlexer(r: Rexp, s: String) : Val = {
+ − 739
Try(strong_blexing_simp(r, s)).getOrElse(Failure)
+ − 740
}
431
+ − 741
494
+ − 742
def strongBlexer5(r: Rexp, s: String): Val = {
+ − 743
Try(strong_blexing_simp5(r, s)).getOrElse(Failure)
+ − 744
}
+ − 745
492
+ − 746
def strong_blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 747
case Nil => {
+ − 748
if (bnullable(r)) {
+ − 749
//println(asize(r))
+ − 750
val bits = mkepsBC(r)
+ − 751
bits
431
+ − 752
}
492
+ − 753
else
+ − 754
throw new Exception("Not matched")
431
+ − 755
}
492
+ − 756
case c::cs => {
+ − 757
val der_res = bder(c,r)
+ − 758
val simp_res = strongBsimp(der_res)
+ − 759
strong_blex_simp(simp_res, cs)
+ − 760
}
+ − 761
}
431
+ − 762
494
+ − 763
def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match {
+ − 764
case Nil => {
+ − 765
if (bnullable(r)) {
+ − 766
//println(asize(r))
+ − 767
val bits = mkepsBC(r)
+ − 768
bits
+ − 769
}
+ − 770
else
+ − 771
throw new Exception("Not matched")
+ − 772
}
+ − 773
case c::cs => {
+ − 774
val der_res = bder(c,r)
+ − 775
val simp_res = strongBsimp5(der_res)
+ − 776
strong_blex_simp5(simp_res, cs)
+ − 777
}
+ − 778
}
431
+ − 779
+ − 780
+ − 781
def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
+ − 782
case Nil => r
435
+ − 783
case c::s =>
494
+ − 784
//println(erase(r))
435
+ − 785
bders_simp(s, bsimp(bder(c, r)))
431
+ − 786
}
+ − 787
494
+ − 788
def bdersStrong5(s: List[Char], r: ARexp) : ARexp = s match {
+ − 789
case Nil => r
+ − 790
case c::s => bdersStrong5(s, strongBsimp5(bder(c, r)))
+ − 791
}
+ − 792
431
+ − 793
def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
+ − 794
492
+ − 795
def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
431
+ − 796
case Nil => r
492
+ − 797
case c::s => bdersStrong(s, strongBsimp(bder(c, r)))
431
+ − 798
}
+ − 799
492
+ − 800
def bdersStrongRexp(s: String, r: Rexp) : ARexp = bdersStrong(s.toList, internalise(r))
431
+ − 801
+ − 802
def erase(r:ARexp): Rexp = r match{
+ − 803
case AZERO => ZERO
+ − 804
case AONE(_) => ONE
+ − 805
case ACHAR(bs, c) => CHAR(c)
+ − 806
case AALTS(bs, Nil) => ZERO
+ − 807
case AALTS(bs, a::Nil) => erase(a)
+ − 808
case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
+ − 809
case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
+ − 810
case ASTAR(cs, r)=> STAR(erase(r))
+ − 811
case ANOT(bs, r) => NOT(erase(r))
+ − 812
case AANYCHAR(bs) => ANYCHAR
+ − 813
}
+ − 814
+ − 815
492
+ − 816
def allCharSeq(r: Rexp) : Boolean = r match {
+ − 817
case CHAR(c) => true
+ − 818
case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
+ − 819
case _ => false
+ − 820
}
+ − 821
+ − 822
def flattenSeq(r: Rexp) : String = r match {
+ − 823
case CHAR(c) => c.toString
+ − 824
case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
+ − 825
case _ => throw new Error("flatten unflattenable rexp")
+ − 826
}
431
+ − 827
492
+ − 828
def shortRexpOutput(r: Rexp) : String = r match {
+ − 829
case CHAR(c) => c.toString
+ − 830
case ONE => "1"
+ − 831
case ZERO => "0"
+ − 832
case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 833
case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 834
case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
+ − 835
case STAR(STAR(r)) => "(..)*"// ++ shortRexpOutput(r) ++ "]*"
+ − 836
case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
+ − 837
//case RTOP => "RTOP"
+ − 838
}
+ − 839
431
+ − 840
492
+ − 841
def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 842
case Nil => {
+ − 843
if (bnullable(r)) {
+ − 844
val bits = mkepsBC(r)
+ − 845
bits
+ − 846
}
+ − 847
else
+ − 848
throw new Exception("Not matched")
+ − 849
}
+ − 850
case c::cs => {
+ − 851
val der_res = bder(c,r)
+ − 852
val simp_res = bsimp(der_res)
+ − 853
blex_simp(simp_res, cs)
+ − 854
}
431
+ − 855
}
+ − 856
+ − 857
492
+ − 858
+ − 859
+ − 860
def size(r: Rexp) : Int = r match {
+ − 861
case ZERO => 1
+ − 862
case ONE => 1
+ − 863
case CHAR(_) => 1
+ − 864
case ANYCHAR => 1
+ − 865
case NOT(r0) => 1 + size(r0)
+ − 866
case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 867
case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
+ − 868
case STAR(r) => 1 + size(r)
431
+ − 869
}
+ − 870
492
+ − 871
def asize(a: ARexp) = size(erase(a))
431
+ − 872
+ − 873
//pder related
+ − 874
type Mon = (Char, Rexp)
+ − 875
type Lin = Set[Mon]
+ − 876
+ − 877
def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
+ − 878
case ZERO => Set()
+ − 879
case ONE => rs
+ − 880
case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) )
+ − 881
}
+ − 882
def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
+ − 883
case ZERO => Set()
+ − 884
case ONE => l
+ − 885
case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) } )
+ − 886
}
+ − 887
def lf(r: Rexp): Lin = r match {
+ − 888
case ZERO => Set()
+ − 889
case ONE => Set()
+ − 890
case CHAR(f) => {
+ − 891
//val Some(c) = alphabet.find(f)
+ − 892
Set((f, ONE))
+ − 893
}
+ − 894
case ALTS(r1, r2) => {
+ − 895
lf(r1 ) ++ lf(r2)
+ − 896
}
+ − 897
case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
+ − 898
case SEQ(r1, r2) =>{
+ − 899
if (nullable(r1))
+ − 900
cir_prod(lf(r1), r2) ++ lf(r2)
+ − 901
else
+ − 902
cir_prod(lf(r1), r2)
+ − 903
}
+ − 904
}
+ − 905
def lfs(r: Set[Rexp]): Lin = {
+ − 906
r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
+ − 907
}
+ − 908
+ − 909
def pder(x: Char, t: Rexp): Set[Rexp] = {
+ − 910
val lft = lf(t)
+ − 911
(lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
+ − 912
}
+ − 913
def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
+ − 914
case x::xs => pders(xs, pder(x, t))
+ − 915
case Nil => Set(t)
+ − 916
}
+ − 917
def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
+ − 918
case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 919
case Nil => ts
+ − 920
}
+ − 921
def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
+ − 922
def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
+ − 923
//all implementation of partial derivatives that involve set union are potentially buggy
+ − 924
//because they don't include the original regular term before they are pdered.
+ − 925
//now only pderas is fixed.
+ − 926
def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
+ − 927
def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
+ − 928
def awidth(r: Rexp) : Int = r match {
+ − 929
case CHAR(c) => 1
+ − 930
case SEQ(r1, r2) => awidth(r1) + awidth(r2)
+ − 931
case ALTS(r1, r2) => awidth(r1) + awidth(r2)
+ − 932
case ONE => 0
+ − 933
case ZERO => 0
+ − 934
case STAR(r) => awidth(r)
+ − 935
}
+ − 936
//def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
+ − 937
//def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
+ − 938
def o(r: Rexp) = if (nullable(r)) ONE else ZERO
+ − 939
//def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
+ − 940
def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
+ − 941
case ZERO => Set[Rexp]()
+ − 942
case ONE => Set[Rexp]()
+ − 943
case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
+ − 944
case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
+ − 945
case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
+ − 946
case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
+ − 947
}
+ − 948
def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
+ − 949
case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 950
case Nil => ts
+ − 951
}
+ − 952
def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
+ − 953
+ − 954
+ − 955
+ − 956
def starPrint(r: Rexp) : Unit = r match {
+ − 957
+ − 958
case SEQ(head, rstar) =>
+ − 959
println(shortRexpOutput(head) ++ "~STARREG")
+ − 960
case STAR(rstar) =>
+ − 961
println("STARREG")
+ − 962
case ALTS(r1, r2) =>
+ − 963
println("(")
+ − 964
starPrint(r1)
+ − 965
println("+")
+ − 966
starPrint(r2)
+ − 967
println(")")
+ − 968
case ZERO => println("0")
+ − 969
+ − 970
}
+ − 971
+ − 972
// @arg(doc = "small tests")
+ − 973
val STARREG = //((( (STAR("aa")) | STAR("aaa") ).%).%)
+ − 974
(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa"| ( STAR("aaaaaaa")) | STAR("aaaaaaaaaaa"))).%).%).%
+ − 975
+ − 976
// @main
492
+ − 977
+ − 978
+ − 979
431
+ − 980
def small() = {
+ − 981
+ − 982
+ − 983
// println(lexing_simp(NOTREG, prog0))
+ − 984
// val v = lex_simp(NOTREG, prog0.toList)
+ − 985
// println(v)
+ − 986
+ − 987
// val d = (lex_simp(NOTREG, prog0.toList))
+ − 988
// println(d)
+ − 989
val pderSTAR = pderUNIV(STARREG)
+ − 990
+ − 991
val refSize = pderSTAR.map(size(_)).sum
435
+ − 992
// println("different partial derivative terms:")
+ − 993
// pderSTAR.foreach(r => r match {
431
+ − 994
435
+ − 995
// case SEQ(head, rstar) =>
+ − 996
// println(shortRexpOutput(head) ++ "~STARREG")
+ − 997
// case STAR(rstar) =>
+ − 998
// println("STARREG")
431
+ − 999
435
+ − 1000
// }
+ − 1001
// )
+ − 1002
// println("the total number of terms is")
+ − 1003
// //println(refSize)
+ − 1004
// println(pderSTAR.size)
431
+ − 1005
492
+ − 1006
// val A : Rexp= ("c" | (ONE | "b") ~ "d") ~((ONE).%)
+ − 1007
// val B : Rexp = ((ONE).%)
+ − 1008
// val C : Rexp = ("d") ~ ((ONE).%)
+ − 1009
// val PRUNE_REG : Rexp = (C | B | A)
+ − 1010
// val APRUNE_REG = internalise(PRUNE_REG)
+ − 1011
// val program_solution = pruneRexp(APRUNE_REG, breakIntoTerms(PRUNE_REG))
+ − 1012
// println("program executes and gives: as disired!")
+ − 1013
// println(shortRexpOutput(erase(program_solution)))
435
+ − 1014
// val simpedPruneReg = strongBsimp(APRUNE_REG)
+ − 1015
+ − 1016
// println(shortRexpOutput(erase(simpedPruneReg)))
492
+ − 1017
+ − 1018
+ − 1019
for(i <- List(1,2,3,4,10, 100, 400, 841, 900 ) ){// 100, 400, 800, 840, 841, 900
+ − 1020
val prog0 = "a" * i
+ − 1021
//println(s"test: $prog0")
+ − 1022
println(s"testing with $i a's" )
+ − 1023
//val bd = bdersSimp(prog0, STARREG)//DB
+ − 1024
val sbd = bdersStrongRexp(prog0, STARREG)//strongDB
+ − 1025
starPrint(erase(sbd))
+ − 1026
val subTerms = breakIntoTerms(erase(sbd))
+ − 1027
//val subTermsLarge = breakIntoTerms(erase(bd))
431
+ − 1028
492
+ − 1029
println(s"subterms of regex with strongDB: ${subTerms.length}")//, standard DB: ${subTermsLarge.length}")
431
+ − 1030
+ − 1031
+ − 1032
492
+ − 1033
println("the number of distinct subterms for bsimp with strongDB")
+ − 1034
println(subTerms.distinct.size)
+ − 1035
//println(subTermsLarge.distinct.size)
+ − 1036
if(pderSTAR.size > subTerms.length)
+ − 1037
println(s"which is less than or equal to the number of PDER terms: ${pderSTAR.size}")
431
+ − 1038
+ − 1039
492
+ − 1040
// println(shortRexpOutput(erase(sbd)))
+ − 1041
// println(shortRexpOutput(erase(bd)))
431
+ − 1042
492
+ − 1043
println("pdersize, original, strongSimp")
+ − 1044
println(refSize, size(STARREG), asize(sbd))
431
+ − 1045
492
+ − 1046
//val vres = strong_blexing_simp( STARREG, prog0)
+ − 1047
//println(vres)
+ − 1048
}
+ − 1049
// println(vs.length)
+ − 1050
// println(vs)
431
+ − 1051
+ − 1052
+ − 1053
// val prog1 = """read n; write n"""
+ − 1054
// println(s"test: $prog1")
+ − 1055
// println(lexing_simp(WHILE_REGS, prog1))
492
+ − 1056
// val display = ("a"| "ab").%
+ − 1057
// val adisplay = internalise(display)
+ − 1058
// bders_simp( "aaaaa".toList, adisplay)
431
+ − 1059
}
+ − 1060
492
+ − 1061
def generator_test() {
+ − 1062
494
+ − 1063
// test(rexp(7), 1000) { (r: Rexp) =>
+ − 1064
// val ss = stringsFromRexp(r)
+ − 1065
// val boolList = ss.map(s => {
+ − 1066
// val simpVal = simpBlexer(r, s)
+ − 1067
// val strongVal = strongBlexer(r, s)
+ − 1068
// // println(simpVal)
+ − 1069
// // println(strongVal)
+ − 1070
// (simpVal == strongVal) && (simpVal != None)
+ − 1071
// })
+ − 1072
// !boolList.exists(b => b == false)
+ − 1073
// }
+ − 1074
// test(single(STAR(ALTS(STAR(CHAR('c')),ALTS(CHAR('c'),ZERO)))), 100000) { (r: Rexp) =>
+ − 1075
// val ss = stringsFromRexp(r)
+ − 1076
// val boolList = ss.map(s => {
+ − 1077
// val bdStrong = bdersStrong(s.toList, internalise(r))
+ − 1078
// val bdStrong5 = bdersStrong5(s.toList, internalise(r))
+ − 1079
// // println(shortRexpOutput(r))
+ − 1080
// // println(s)
+ − 1081
// // println(strongVal)
+ − 1082
// // println(strongVal5)
+ − 1083
// // (strongVal == strongVal5)
493
+ − 1084
494
+ − 1085
// if(asize(bdStrong5) > asize(bdStrong)){
+ − 1086
// println(s)
+ − 1087
// println(shortRexpOutput(erase(bdStrong5)))
+ − 1088
// println(shortRexpOutput(erase(bdStrong)))
+ − 1089
// }
+ − 1090
// asize(bdStrong5) <= asize(bdStrong)
+ − 1091
// })
+ − 1092
// !boolList.exists(b => b == false)
+ − 1093
// }
+ − 1094
//test(rexp(3), 10000000) { (r: Rexp) =>
+ − 1095
test(single(STAR(SEQ(SEQ(ALTS(CHAR('b'),STAR(CHAR('b'))),CHAR('b')),ALTS(ALTS(STAR(CHAR('c')),SEQ(CHAR('a'),CHAR('c'))),ALTS(ZERO,ONE))))), 1) {(r: Rexp) =>
493
+ − 1096
val ss = stringsFromRexp(r)
+ − 1097
val boolList = ss.map(s => {
494
+ − 1098
val bdStrong = bdersStrong(s.toList, internalise(r))
+ − 1099
val bdStrong5 = bdersStrong5(s.toList, internalise(r))
+ − 1100
val size5 = asize(bdStrong5)
+ − 1101
val size0 = asize(bdStrong)
+ − 1102
println(s)
+ − 1103
println(shortRexpOutput(erase(bdStrong5)))
+ − 1104
println(shortRexpOutput(erase(bdStrong)))
+ − 1105
println(size5, size0)
+ − 1106
// println(s)
+ − 1107
size5 <= size0
493
+ − 1108
})
494
+ − 1109
// println(boolList)
493
+ − 1110
!boolList.exists(b => b == false)
492
+ − 1111
}
493
+ − 1112
+ − 1113
492
+ − 1114
}
+ − 1115
// small()
+ − 1116
generator_test()
493
+ − 1117
+ − 1118
1
+ − 1119
+ − 1120
SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))),
+ − 1121
SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))),
+ − 1122
STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))),
+ − 1123
SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE))
+ − 1124
+ − 1125
+ − 1126
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))
+ − 1127
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))