518
+ − 1
//Strong Bsimp to obtain Antimirov's cubic bound
+ − 2
431
+ − 3
// A simple lexer inspired by work of Sulzmann & Lu
+ − 4
//==================================================
+ − 5
//
+ − 6
// Call the test cases with
+ − 7
//
+ − 8
// amm lexer.sc small
+ − 9
// amm lexer.sc fib
+ − 10
// amm lexer.sc loops
+ − 11
// amm lexer.sc email
+ − 12
//
+ − 13
// amm lexer.sc all
+ − 14
514
+ − 15
431
+ − 16
+ − 17
// regular expressions including records
+ − 18
abstract class Rexp
+ − 19
case object ZERO extends Rexp
+ − 20
case object ONE extends Rexp
+ − 21
case object ANYCHAR extends Rexp
+ − 22
case class CHAR(c: Char) extends Rexp
+ − 23
case class ALTS(r1: Rexp, r2: Rexp) extends Rexp
+ − 24
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+ − 25
case class STAR(r: Rexp) extends Rexp
+ − 26
case class RECD(x: String, r: Rexp) extends Rexp
+ − 27
case class NTIMES(n: Int, r: Rexp) extends Rexp
+ − 28
case class OPTIONAL(r: Rexp) extends Rexp
+ − 29
case class NOT(r: Rexp) extends Rexp
+ − 30
// records for extracting strings or tokens
+ − 31
+ − 32
// values
+ − 33
abstract class Val
492
+ − 34
case object Failure extends Val
431
+ − 35
case object Empty extends Val
+ − 36
case class Chr(c: Char) extends Val
+ − 37
case class Sequ(v1: Val, v2: Val) extends Val
+ − 38
case class Left(v: Val) extends Val
+ − 39
case class Right(v: Val) extends Val
+ − 40
case class Stars(vs: List[Val]) extends Val
+ − 41
case class Rec(x: String, v: Val) extends Val
+ − 42
case class Ntime(vs: List[Val]) extends Val
+ − 43
case class Optionall(v: Val) extends Val
+ − 44
case class Nots(s: String) extends Val
+ − 45
+ − 46
+ − 47
+ − 48
abstract class Bit
+ − 49
case object Z extends Bit
+ − 50
case object S extends Bit
+ − 51
+ − 52
+ − 53
type Bits = List[Bit]
+ − 54
+ − 55
abstract class ARexp
+ − 56
case object AZERO extends ARexp
+ − 57
case class AONE(bs: Bits) extends ARexp
+ − 58
case class ACHAR(bs: Bits, c: Char) extends ARexp
+ − 59
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp
+ − 60
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp
+ − 61
case class ASTAR(bs: Bits, r: ARexp) extends ARexp
+ − 62
case class ANOT(bs: Bits, r: ARexp) extends ARexp
+ − 63
case class AANYCHAR(bs: Bits) extends ARexp
+ − 64
514
+ − 65
import scala.util.Try
+ − 66
492
+ − 67
trait Generator[+T] {
+ − 68
self => // an alias for "this"
+ − 69
def generate(): T
+ − 70
+ − 71
def gen(n: Int) : List[T] =
+ − 72
if (n == 0) Nil else self.generate() :: gen(n - 1)
+ − 73
+ − 74
def map[S](f: T => S): Generator[S] = new Generator[S] {
+ − 75
def generate = f(self.generate())
+ − 76
}
+ − 77
def flatMap[S](f: T => Generator[S]): Generator[S] = new Generator[S] {
+ − 78
def generate = f(self.generate()).generate()
+ − 79
}
+ − 80
+ − 81
+ − 82
}
+ − 83
+ − 84
// tests a property according to a given random generator
+ − 85
def test[T](r: Generator[T], amount: Int = 100)(pred: T => Boolean) {
+ − 86
for (_ <- 0 until amount) {
+ − 87
val value = r.generate()
+ − 88
assert(pred(value), s"Test failed for: $value")
+ − 89
}
+ − 90
println(s"Test passed $amount times")
+ − 91
}
+ − 92
def test2[T, S](r: Generator[T], s: Generator[S], amount: Int = 100)(pred: (T, S) => Boolean) {
+ − 93
for (_ <- 0 until amount) {
+ − 94
val valueR = r.generate()
+ − 95
val valueS = s.generate()
+ − 96
assert(pred(valueR, valueS), s"Test failed for: $valueR, $valueS")
+ − 97
}
+ − 98
println(s"Test passed $amount times")
+ − 99
}
+ − 100
+ − 101
// random integers
+ − 102
val integers = new Generator[Int] {
+ − 103
val rand = new java.util.Random
+ − 104
def generate() = rand.nextInt()
+ − 105
}
+ − 106
+ − 107
// random booleans
+ − 108
val booleans = integers.map(_ > 0)
+ − 109
+ − 110
// random integers in the range lo and high
+ − 111
def range(lo: Int, hi: Int): Generator[Int] =
+ − 112
for (x <- integers) yield (lo + x.abs % (hi - lo)).abs
+ − 113
+ − 114
// random characters
+ − 115
def chars_range(lo: Char, hi: Char) = range(lo, hi).map(_.toChar)
+ − 116
val chars = chars_range('a', 'z')
+ − 117
+ − 118
+ − 119
def oneOf[T](xs: T*): Generator[T] =
+ − 120
for (idx <- range(0, xs.length)) yield xs(idx)
+ − 121
+ − 122
def single[T](x: T) = new Generator[T] {
+ − 123
def generate() = x
+ − 124
}
+ − 125
+ − 126
def pairs[T, U](t: Generator[T], u: Generator[U]): Generator[(T, U)] =
+ − 127
for (x <- t; y <- u) yield (x, y)
+ − 128
+ − 129
// lists
+ − 130
def emptyLists = single(Nil)
+ − 131
+ − 132
def nonEmptyLists : Generator[List[Int]] =
+ − 133
for (head <- integers; tail <- lists) yield head :: tail
+ − 134
+ − 135
def lists: Generator[List[Int]] = for {
+ − 136
kind <- booleans
+ − 137
list <- if (kind) emptyLists else nonEmptyLists
+ − 138
} yield list
+ − 139
+ − 140
def char_list(len: Int): Generator[List[Char]] = {
+ − 141
if(len <= 0) single(Nil)
+ − 142
else{
+ − 143
for {
500
+ − 144
c <- chars_range('a', 'c')
492
+ − 145
tail <- char_list(len - 1)
+ − 146
} yield c :: tail
+ − 147
}
+ − 148
}
+ − 149
+ − 150
def strings(len: Int): Generator[String] = for(cs <- char_list(len)) yield cs.toString
+ − 151
493
+ − 152
def sampleString(r: Rexp) : List[String] = r match {
+ − 153
case STAR(r) => stringsFromRexp(r).flatMap(s => List("", s, s ++ s))//only generate 0, 1, 2 reptitions
+ − 154
case SEQ(r1, r2) => stringsFromRexp(r1).flatMap(s1 => stringsFromRexp(r2).map(s2 => s1 ++ s2) )
+ − 155
case ALTS(r1, r2) => throw new Error(s" Rexp ${r} not expected: all alternatives are supposed to have been opened up")
+ − 156
case ONE => "" :: Nil
+ − 157
case ZERO => Nil
+ − 158
case CHAR(c) => c.toString :: Nil
+ − 159
+ − 160
}
+ − 161
+ − 162
def stringsFromRexp(r: Rexp) : List[String] =
+ − 163
breakIntoTerms(r).flatMap(r => sampleString(r))
+ − 164
492
+ − 165
+ − 166
// (simple) binary trees
+ − 167
trait Tree[T]
+ − 168
case class Inner[T](left: Tree[T], right: Tree[T]) extends Tree[T]
+ − 169
case class Leaf[T](x: T) extends Tree[T]
+ − 170
+ − 171
def leafs[T](t: Generator[T]): Generator[Leaf[T]] =
+ − 172
for (x <- t) yield Leaf(x)
+ − 173
+ − 174
def inners[T](t: Generator[T]): Generator[Inner[T]] =
+ − 175
for (l <- trees(t); r <- trees(t)) yield Inner(l, r)
+ − 176
+ − 177
def trees[T](t: Generator[T]): Generator[Tree[T]] =
+ − 178
for (kind <- range(0, 2);
+ − 179
tree <- if (kind == 0) leafs(t) else inners(t)) yield tree
+ − 180
+ − 181
// regular expressions
+ − 182
+ − 183
// generates random leaf-regexes; prefers CHAR-regexes
+ − 184
def leaf_rexp() : Generator[Rexp] =
+ − 185
for (kind <- range(0, 5);
+ − 186
c <- chars_range('a', 'd')) yield
+ − 187
kind match {
+ − 188
case 0 => ZERO
+ − 189
case 1 => ONE
+ − 190
case _ => CHAR(c)
+ − 191
}
+ − 192
+ − 193
// generates random inner regexes with maximum depth d
+ − 194
def inner_rexp(d: Int) : Generator[Rexp] =
+ − 195
for (kind <- range(0, 3);
+ − 196
l <- rexp(d);
+ − 197
r <- rexp(d))
+ − 198
yield kind match {
+ − 199
case 0 => ALTS(l, r)
+ − 200
case 1 => SEQ(l, r)
+ − 201
case 2 => STAR(r)
+ − 202
}
+ − 203
+ − 204
// generates random regexes with maximum depth d;
+ − 205
// prefers inner regexes in 2/3 of the cases
+ − 206
def rexp(d: Int = 100): Generator[Rexp] =
+ − 207
for (kind <- range(0, 3);
+ − 208
r <- if (d <= 0 || kind == 0) leaf_rexp() else inner_rexp(d - 1)) yield r
+ − 209
+ − 210
+ − 211
// some test functions for rexps
+ − 212
def height(r: Rexp) : Int = r match {
+ − 213
case ZERO => 1
+ − 214
case ONE => 1
+ − 215
case CHAR(_) => 1
+ − 216
case ALTS(r1, r2) => 1 + List(height(r1), height(r2)).max
+ − 217
case SEQ(r1, r2) => 1 + List(height(r1), height(r2)).max
+ − 218
case STAR(r) => 1 + height(r)
+ − 219
}
+ − 220
+ − 221
// def size(r: Rexp) : Int = r match {
+ − 222
// case ZERO => 1
+ − 223
// case ONE => 1
+ − 224
// case CHAR(_) => 1
+ − 225
// case ALTS(r1, r2) => 1 + size(r1) + size(r2)
+ − 226
// case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 227
// case STAR(r) => 1 + size(r)
+ − 228
// }
+ − 229
+ − 230
// randomly subtracts 1 or 2 from the STAR case
+ − 231
def size_faulty(r: Rexp) : Int = r match {
+ − 232
case ZERO => 1
+ − 233
case ONE => 1
+ − 234
case CHAR(_) => 1
+ − 235
case ALTS(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
+ − 236
case SEQ(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
+ − 237
case STAR(r) => 1 + size_faulty(r) - range(0, 2).generate
+ − 238
}
+ − 239
431
+ − 240
+ − 241
+ − 242
// some convenience for typing in regular expressions
+ − 243
+ − 244
def charlist2rexp(s : List[Char]): Rexp = s match {
+ − 245
case Nil => ONE
+ − 246
case c::Nil => CHAR(c)
+ − 247
case c::s => SEQ(CHAR(c), charlist2rexp(s))
+ − 248
}
+ − 249
implicit def string2rexp(s : String) : Rexp =
+ − 250
charlist2rexp(s.toList)
+ − 251
+ − 252
implicit def RexpOps(r: Rexp) = new {
+ − 253
def | (s: Rexp) = ALTS(r, s)
+ − 254
def % = STAR(r)
+ − 255
def ~ (s: Rexp) = SEQ(r, s)
+ − 256
}
+ − 257
+ − 258
implicit def stringOps(s: String) = new {
+ − 259
def | (r: Rexp) = ALTS(s, r)
+ − 260
def | (r: String) = ALTS(s, r)
+ − 261
def % = STAR(s)
+ − 262
def ~ (r: Rexp) = SEQ(s, r)
+ − 263
def ~ (r: String) = SEQ(s, r)
+ − 264
def $ (r: Rexp) = RECD(s, r)
+ − 265
}
+ − 266
+ − 267
def nullable(r: Rexp) : Boolean = r match {
+ − 268
case ZERO => false
+ − 269
case ONE => true
+ − 270
case CHAR(_) => false
+ − 271
case ANYCHAR => false
+ − 272
case ALTS(r1, r2) => nullable(r1) || nullable(r2)
+ − 273
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ − 274
case STAR(_) => true
+ − 275
case RECD(_, r1) => nullable(r1)
+ − 276
case NTIMES(n, r) => if (n == 0) true else nullable(r)
+ − 277
case OPTIONAL(r) => true
+ − 278
case NOT(r) => !nullable(r)
+ − 279
}
+ − 280
+ − 281
def der(c: Char, r: Rexp) : Rexp = r match {
+ − 282
case ZERO => ZERO
+ − 283
case ONE => ZERO
+ − 284
case CHAR(d) => if (c == d) ONE else ZERO
+ − 285
case ANYCHAR => ONE
+ − 286
case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
+ − 287
case SEQ(r1, r2) =>
+ − 288
if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
+ − 289
else SEQ(der(c, r1), r2)
+ − 290
case STAR(r) => SEQ(der(c, r), STAR(r))
+ − 291
case RECD(_, r1) => der(c, r1)
+ − 292
case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
+ − 293
case OPTIONAL(r) => der(c, r)
+ − 294
case NOT(r) => NOT(der(c, r))
+ − 295
}
+ − 296
+ − 297
+ − 298
// extracts a string from a value
+ − 299
def flatten(v: Val) : String = v match {
+ − 300
case Empty => ""
+ − 301
case Chr(c) => c.toString
+ − 302
case Left(v) => flatten(v)
+ − 303
case Right(v) => flatten(v)
+ − 304
case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
+ − 305
case Stars(vs) => vs.map(flatten).mkString
+ − 306
case Ntime(vs) => vs.map(flatten).mkString
+ − 307
case Optionall(v) => flatten(v)
+ − 308
case Rec(_, v) => flatten(v)
+ − 309
}
+ − 310
+ − 311
+ − 312
// extracts an environment from a value;
+ − 313
// used for tokenising a string
+ − 314
def env(v: Val) : List[(String, String)] = v match {
+ − 315
case Empty => Nil
+ − 316
case Chr(c) => Nil
+ − 317
case Left(v) => env(v)
+ − 318
case Right(v) => env(v)
+ − 319
case Sequ(v1, v2) => env(v1) ::: env(v2)
+ − 320
case Stars(vs) => vs.flatMap(env)
+ − 321
case Ntime(vs) => vs.flatMap(env)
+ − 322
case Rec(x, v) => (x, flatten(v))::env(v)
+ − 323
case Optionall(v) => env(v)
+ − 324
case Nots(s) => ("Negative", s) :: Nil
+ − 325
}
+ − 326
+ − 327
+ − 328
// The injection and mkeps part of the lexer
+ − 329
//===========================================
+ − 330
+ − 331
def mkeps(r: Rexp) : Val = r match {
+ − 332
case ONE => Empty
+ − 333
case ALTS(r1, r2) =>
+ − 334
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+ − 335
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+ − 336
case STAR(r) => Stars(Nil)
+ − 337
case RECD(x, r) => Rec(x, mkeps(r))
+ − 338
case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
+ − 339
case OPTIONAL(r) => Optionall(Empty)
+ − 340
case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")
+ − 341
else Nots("")//Nots(s.reverse.toString)
+ − 342
// case NOT(ZERO) => Empty
+ − 343
// case NOT(CHAR(c)) => Empty
+ − 344
// case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
+ − 345
// case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
+ − 346
// case NOT(STAR(r)) => Stars(Nil)
+ − 347
+ − 348
}
+ − 349
+ − 350
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+ − 351
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+ − 352
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+ − 353
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+ − 354
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+ − 355
case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
+ − 356
case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
+ − 357
case (CHAR(d), Empty) => Chr(c)
+ − 358
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+ − 359
case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
+ − 360
case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
+ − 361
case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
+ − 362
case (ANYCHAR, Empty) => Chr(c)
+ − 363
}
+ − 364
+ − 365
// some "rectification" functions for simplification
+ − 366
+ − 367
+ − 368
+ − 369
+ − 370
// The Lexing Rules for the WHILE Language
+ − 371
+ − 372
// bnullable function: tests whether the aregular
+ − 373
// expression can recognise the empty string
+ − 374
def bnullable (r: ARexp) : Boolean = r match {
+ − 375
case AZERO => false
+ − 376
case AONE(_) => true
+ − 377
case ACHAR(_,_) => false
+ − 378
case AALTS(_, rs) => rs.exists(bnullable)
+ − 379
case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
+ − 380
case ASTAR(_, _) => true
+ − 381
case ANOT(_, rn) => !bnullable(rn)
+ − 382
}
+ − 383
+ − 384
def mkepsBC(r: ARexp) : Bits = r match {
+ − 385
case AONE(bs) => bs
+ − 386
case AALTS(bs, rs) => {
+ − 387
val n = rs.indexWhere(bnullable)
+ − 388
bs ++ mkepsBC(rs(n))
+ − 389
}
+ − 390
case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
+ − 391
case ASTAR(bs, r) => bs ++ List(Z)
+ − 392
case ANOT(bs, rn) => bs
+ − 393
}
+ − 394
+ − 395
+ − 396
def bder(c: Char, r: ARexp) : ARexp = r match {
+ − 397
case AZERO => AZERO
+ − 398
case AONE(_) => AZERO
+ − 399
case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
+ − 400
case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
+ − 401
case ASEQ(bs, r1, r2) =>
+ − 402
if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
+ − 403
else ASEQ(bs, bder(c, r1), r2)
+ − 404
case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
+ − 405
case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
+ − 406
case AANYCHAR(bs) => AONE(bs)
+ − 407
}
+ − 408
+ − 409
def fuse(bs: Bits, r: ARexp) : ARexp = r match {
+ − 410
case AZERO => AZERO
+ − 411
case AONE(cs) => AONE(bs ++ cs)
+ − 412
case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
+ − 413
case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
+ − 414
case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
+ − 415
case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
+ − 416
case ANOT(cs, r) => ANOT(bs ++ cs, r)
+ − 417
}
+ − 418
+ − 419
+ − 420
def internalise(r: Rexp) : ARexp = r match {
+ − 421
case ZERO => AZERO
+ − 422
case ONE => AONE(Nil)
+ − 423
case CHAR(c) => ACHAR(Nil, c)
+ − 424
//case PRED(f) => APRED(Nil, f)
+ − 425
case ALTS(r1, r2) =>
+ − 426
AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
+ − 427
// case ALTS(r1::rs) => {
+ − 428
// val AALTS(Nil, rs2) = internalise(ALTS(rs))
+ − 429
// AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
+ − 430
// }
+ − 431
case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
+ − 432
case STAR(r) => ASTAR(Nil, internalise(r))
+ − 433
case RECD(x, r) => internalise(r)
+ − 434
case NOT(r) => ANOT(Nil, internalise(r))
+ − 435
case ANYCHAR => AANYCHAR(Nil)
+ − 436
}
+ − 437
+ − 438
+ − 439
def bsimp(r: ARexp): ARexp =
+ − 440
{
+ − 441
r match {
+ − 442
case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
+ − 443
case (AZERO, _) => AZERO
+ − 444
case (_, AZERO) => AZERO
+ − 445
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 446
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 447
}
+ − 448
case AALTS(bs1, rs) => {
+ − 449
val rs_simp = rs.map(bsimp(_))
+ − 450
val flat_res = flats(rs_simp)
+ − 451
val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
+ − 452
dist_res match {
+ − 453
case Nil => AZERO
+ − 454
case s :: Nil => fuse(bs1, s)
+ − 455
case rs => AALTS(bs1, rs)
+ − 456
}
+ − 457
+ − 458
}
+ − 459
case r => r
+ − 460
}
492
+ − 461
}
+ − 462
def strongBsimp(r: ARexp): ARexp =
+ − 463
{
+ − 464
r match {
+ − 465
case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
+ − 466
case (AZERO, _) => AZERO
+ − 467
case (_, AZERO) => AZERO
+ − 468
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 469
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
431
+ − 470
}
492
+ − 471
case AALTS(bs1, rs) => {
+ − 472
val rs_simp = rs.map(strongBsimp(_))
+ − 473
val flat_res = flats(rs_simp)
+ − 474
val dist_res = distinctBy4(flat_res)//distinctBy(flat_res, erase)
+ − 475
dist_res match {
+ − 476
case Nil => AZERO
+ − 477
case s :: Nil => fuse(bs1, s)
+ − 478
case rs => AALTS(bs1, rs)
+ − 479
}
+ − 480
+ − 481
}
+ − 482
case r => r
431
+ − 483
}
492
+ − 484
}
431
+ − 485
494
+ − 486
def strongBsimp5(r: ARexp): ARexp =
+ − 487
{
+ − 488
// println("was this called?")
+ − 489
r match {
+ − 490
case ASEQ(bs1, r1, r2) => (strongBsimp5(r1), strongBsimp5(r2)) match {
+ − 491
case (AZERO, _) => AZERO
+ − 492
case (_, AZERO) => AZERO
+ − 493
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 494
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 495
}
+ − 496
case AALTS(bs1, rs) => {
+ − 497
// println("alts case")
+ − 498
val rs_simp = rs.map(strongBsimp5(_))
+ − 499
val flat_res = flats(rs_simp)
500
+ − 500
var dist_res = distinctBy5(flat_res)//distinctBy(flat_res, erase)
518
+ − 501
// var dist2_res = distinctBy5(dist_res)
+ − 502
// while(dist_res != dist2_res){
+ − 503
// dist_res = dist2_res
+ − 504
// dist2_res = distinctBy5(dist_res)
+ − 505
// }
+ − 506
(dist_res) match {
+ − 507
case Nil => AZERO
+ − 508
case s :: Nil => fuse(bs1, s)
+ − 509
case rs => AALTS(bs1, rs)
500
+ − 510
}
518
+ − 511
}
+ − 512
case r => r
+ − 513
}
+ − 514
}
+ − 515
+ − 516
def strongBsimp6(r: ARexp): ARexp =
+ − 517
{
+ − 518
// println("was this called?")
+ − 519
r match {
+ − 520
case ASEQ(bs1, r1, r2) => (strongBsimp6(r1), strongBsimp6(r2)) match {
+ − 521
case (AZERO, _) => AZERO
+ − 522
case (_, AZERO) => AZERO
+ − 523
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 524
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 525
}
+ − 526
case AALTS(bs1, rs) => {
+ − 527
// println("alts case")
+ − 528
val rs_simp = rs.map(strongBsimp6(_))
+ − 529
val flat_res = flats(rs_simp)
+ − 530
var dist_res = distinctBy6(flat_res)//distinctBy(flat_res, erase)
+ − 531
(dist_res) match {
494
+ − 532
case Nil => AZERO
+ − 533
case s :: Nil => fuse(bs1, s)
+ − 534
case rs => AALTS(bs1, rs)
+ − 535
}
+ − 536
}
+ − 537
case r => r
+ − 538
}
+ − 539
}
+ − 540
492
+ − 541
def bders (s: List[Char], r: ARexp) : ARexp = s match {
+ − 542
case Nil => r
+ − 543
case c::s => bders(s, bder(c, r))
+ − 544
}
431
+ − 545
492
+ − 546
def flats(rs: List[ARexp]): List[ARexp] = rs match {
431
+ − 547
case Nil => Nil
492
+ − 548
case AZERO :: rs1 => flats(rs1)
+ − 549
case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
+ − 550
case r1 :: rs2 => r1 :: flats(rs2)
431
+ − 551
}
+ − 552
492
+ − 553
def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
+ − 554
case Nil => Nil
+ − 555
case (x::xs) => {
+ − 556
val res = f(x)
+ − 557
if (acc.contains(res)) distinctBy(xs, f, acc)
+ − 558
else x::distinctBy(xs, f, res::acc)
431
+ − 559
}
492
+ − 560
}
431
+ − 561
+ − 562
492
+ − 563
def pruneRexp(r: ARexp, allowableTerms: List[Rexp]) : ARexp = {
+ − 564
r match {
+ − 565
case ASEQ(bs, r1, r2) =>
+ − 566
val termsTruncated = allowableTerms.collect(rt => rt match {
+ − 567
case SEQ(r1p, r2p) if(r2p == erase(r2)) => r1p//if(r2p == erase(r2))
+ − 568
})
+ − 569
val pruned : ARexp = pruneRexp(r1, termsTruncated)
+ − 570
pruned match {
+ − 571
case AZERO => AZERO
+ − 572
case AONE(bs1) => fuse(bs ++ bs1, r2)
+ − 573
case pruned1 => ASEQ(bs, pruned1, r2)
+ − 574
}
+ − 575
+ − 576
case AALTS(bs, rs) =>
+ − 577
//allowableTerms.foreach(a => println(shortRexpOutput(a)))
+ − 578
val rsp = rs.map(r =>
+ − 579
pruneRexp(r, allowableTerms)
+ − 580
)
+ − 581
.filter(r => r != AZERO)
+ − 582
rsp match {
+ − 583
case Nil => AZERO
+ − 584
case r1::Nil => fuse(bs, r1)
+ − 585
case rs1 => AALTS(bs, rs1)
+ − 586
}
+ − 587
case r =>
+ − 588
if(allowableTerms.contains(erase(r))) r else AZERO //assert(r != AZERO)
+ − 589
}
+ − 590
}
+ − 591
+ − 592
def oneSimp(r: Rexp) : Rexp = r match {
+ − 593
case SEQ(ONE, r) => r
+ − 594
case SEQ(r1, r2) => SEQ(oneSimp(r1), r2)
+ − 595
case r => r//assert r != 0
432
+ − 596
492
+ − 597
}
431
+ − 598
+ − 599
492
+ − 600
def distinctBy4(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 601
case Nil => Nil
+ − 602
case x :: xs =>
+ − 603
//assert(acc.distinct == acc)
+ − 604
val erased = erase(x)
+ − 605
if(acc.contains(erased))
+ − 606
distinctBy4(xs, acc)
+ − 607
else{
+ − 608
val addToAcc = breakIntoTerms(erased).filter(r => !acc.contains(oneSimp(r)))
+ − 609
//val xp = pruneRexp(x, addToAcc)
+ − 610
pruneRexp(x, addToAcc) match {
+ − 611
case AZERO => distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 612
case xPrime => xPrime :: distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 613
}
431
+ − 614
}
492
+ − 615
}
+ − 616
494
+ − 617
// fun pAKC_aux :: "arexp list ⇒ arexp ⇒ rexp ⇒ arexp"
+ − 618
// where
+ − 619
// "pAKC_aux rsa r ctx = (if (L (SEQ (erase r) ( ctx) )) ⊆ (L (erase (AALTs [] rsa))) then AZERO else
+ − 620
// case r of (ASEQ bs r1 r2) ⇒
+ − 621
// bsimp_ASEQ bs (pAKC_aux rsa r1 (SEQ (erase r2) ( ctx) )) r2 |
+ − 622
// (AALTs bs rs) ⇒
+ − 623
// bsimp_AALTs bs (flts (map (λr. pAKC_aux rsa r ( ctx) ) rs) ) |
+ − 624
// r ⇒ r
+ − 625
+ − 626
// def canonicalSeq(rs: List[Rexp], acc: Rexp) = rs match {
+ − 627
// case r::Nil => SEQ(r, acc)
+ − 628
// case Nil => acc
+ − 629
// case r1::r2::Nil => SEQ(SEQ(r1, r2), acc)
+ − 630
// }
+ − 631
+ − 632
+ − 633
//the "fake" Language interpretation: just concatenates!
+ − 634
def L(acc: Rexp, rs: List[Rexp]) : Rexp = rs match {
+ − 635
case Nil => acc
+ − 636
case r :: rs1 =>
500
+ − 637
// if(acc == ONE)
+ − 638
// L(r, rs1)
+ − 639
// else
494
+ − 640
L(SEQ(acc, r), rs1)
+ − 641
}
+ − 642
500
+ − 643
def rprint(r: Rexp) : Unit = println(shortRexpOutput(r))
526
+ − 644
def rsprint(rs: Iterable[Rexp]) = rs.foreach(r => println(shortRexpOutput(r)))
+ − 645
500
+ − 646
def aprint(a: ARexp) = println(shortRexpOutput(erase(a)))
+ − 647
def asprint(as: List[ARexp]) = as.foreach(a => println(shortRexpOutput(erase(a))))
+ − 648
+ − 649
def pAKC(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
+ − 650
// println("pakc")
+ − 651
// println(shortRexpOutput(erase(r)))
+ − 652
// println("acc")
+ − 653
// rsprint(acc)
+ − 654
// println("ctx---------")
+ − 655
// rsprint(ctx)
+ − 656
// println("ctx---------end")
+ − 657
// rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp))
+ − 658
+ − 659
if (breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)) {//acc.flatMap(breakIntoTerms
494
+ − 660
AZERO
+ − 661
}
+ − 662
else{
+ − 663
r match {
+ − 664
case ASEQ(bs, r1, r2) =>
500
+ − 665
(pAKC(acc, r1, erase(r2) :: ctx)) match{
494
+ − 666
case AZERO =>
+ − 667
AZERO
+ − 668
case AONE(bs1) =>
+ − 669
fuse(bs1, r2)
+ − 670
case r1p =>
+ − 671
ASEQ(bs, r1p, r2)
+ − 672
}
+ − 673
case AALTS(bs, rs0) =>
+ − 674
// println("before pruning")
+ − 675
// println(s"ctx is ")
+ − 676
// ctx.foreach(r => println(shortRexpOutput(r)))
+ − 677
// println(s"rs0 is ")
+ − 678
// rs0.foreach(r => println(shortRexpOutput(erase(r))))
500
+ − 679
// println(s"acc is ")
+ − 680
// acc.foreach(r => println(shortRexpOutput(r)))
+ − 681
rs0.map(r => pAKC(acc, r, ctx)).filter(_ != AZERO) match {
494
+ − 682
case Nil =>
+ − 683
// println("after pruning Nil")
+ − 684
AZERO
+ − 685
case r :: Nil =>
+ − 686
// println("after pruning singleton")
+ − 687
// println(shortRexpOutput(erase(r)))
+ − 688
r
+ − 689
case rs0p =>
+ − 690
// println("after pruning non-singleton")
+ − 691
AALTS(bs, rs0p)
+ − 692
}
+ − 693
case r => r
+ − 694
}
+ − 695
}
+ − 696
}
+ − 697
+ − 698
def distinctBy5(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 699
case Nil =>
+ − 700
Nil
+ − 701
case x :: xs => {
+ − 702
val erased = erase(x)
+ − 703
if(acc.contains(erased)){
+ − 704
distinctBy5(xs, acc)
+ − 705
}
+ − 706
else{
+ − 707
val pruned = pAKC(acc, x, Nil)
+ − 708
val newTerms = breakIntoTerms(erase(pruned))
+ − 709
pruned match {
+ − 710
case AZERO =>
+ − 711
distinctBy5(xs, acc)
+ − 712
case xPrime =>
+ − 713
xPrime :: distinctBy5(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 714
}
+ − 715
}
+ − 716
}
+ − 717
}
+ − 718
526
+ − 719
def strongBreakIntoTerms(r: Rexp): List[Rexp] = r match {
+ − 720
case SEQ(r1, r2) => if(nullable(r1))
+ − 721
strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2)) :::
+ − 722
strongBreakIntoTerms(r2)
+ − 723
else
+ − 724
strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2))
+ − 725
case ALTS(r1, r2) => strongBreakIntoTerms(r1) ::: strongBreakIntoTerms(r2)
+ − 726
case ZERO => Nil
+ − 727
case _ => r :: Nil
+ − 728
}
+ − 729
+ − 730
def attachCtx(r: ARexp, ctx: List[Rexp]) : Set[Rexp] = {
+ − 731
val res = strongBreakIntoTerms((L(erase(r), ctx))).map(oneSimp)
+ − 732
res.toSet
518
+ − 733
}
+ − 734
+ − 735
def ABIncludedByC[A, B, C](a: A, b: B, c: C, f: (A, B) => C, inclusionPred: (C, C) => Boolean) : Boolean = {
526
+ − 736
518
+ − 737
inclusionPred(f(a, b), c)
+ − 738
}
526
+ − 739
def rexpListInclusion(rs1: Set[Rexp], rs2: Set[Rexp]) : Boolean = {
+ − 740
// println("r+ctx---------")
+ − 741
// rsprint(rs1)
+ − 742
// println("acc---------")
+ − 743
// rsprint(rs2)
+ − 744
+ − 745
val res = rs1.forall(rs2.contains)
+ − 746
// println(res)
+ − 747
// println("end------------------")
+ − 748
res
518
+ − 749
}
526
+ − 750
def pAKC6(acc: Set[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
+ − 751
// println("pakc--------r")
518
+ − 752
// println(shortRexpOutput(erase(r)))
526
+ − 753
// println("ctx---------")
+ − 754
// rsprint(ctx)
+ − 755
// println("pakc-------acc")
518
+ − 756
// rsprint(acc)
526
+ − 757
// println("r+ctx broken down---------")
518
+ − 758
// rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp))
+ − 759
+ − 760
// rprint(L(erase(r), ctx))
+ − 761
//breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)
+ − 762
if (ABIncludedByC(r, ctx, acc, attachCtx, rexpListInclusion)) {//acc.flatMap(breakIntoTerms
+ − 763
//println("included in acc!!!")
+ − 764
AZERO
+ − 765
}
+ − 766
else{
+ − 767
r match {
+ − 768
case ASEQ(bs, r1, r2) =>
+ − 769
(pAKC6(acc, r1, erase(r2) :: ctx)) match{
+ − 770
case AZERO =>
+ − 771
AZERO
+ − 772
case AONE(bs1) =>
+ − 773
fuse(bs1, r2)
+ − 774
case r1p =>
+ − 775
ASEQ(bs, r1p, r2)
+ − 776
}
+ − 777
case AALTS(bs, rs0) =>
+ − 778
// println("before pruning")
+ − 779
// println(s"ctx is ")
+ − 780
// ctx.foreach(r => println(shortRexpOutput(r)))
+ − 781
// println(s"rs0 is ")
+ − 782
// rs0.foreach(r => println(shortRexpOutput(erase(r))))
+ − 783
// println(s"acc is ")
+ − 784
// acc.foreach(r => println(shortRexpOutput(r)))
+ − 785
rs0.map(r => pAKC6(acc, r, ctx)).filter(_ != AZERO) match {
+ − 786
case Nil =>
+ − 787
// println("after pruning Nil")
+ − 788
AZERO
+ − 789
case r :: Nil =>
+ − 790
// println("after pruning singleton")
+ − 791
// println(shortRexpOutput(erase(r)))
+ − 792
r
+ − 793
case rs0p =>
+ − 794
// println("after pruning non-singleton")
+ − 795
AALTS(bs, rs0p)
+ − 796
}
+ − 797
case r => r
+ − 798
}
+ − 799
}
+ − 800
}
+ − 801
+ − 802
526
+ − 803
def distinctBy6(xs: List[ARexp], acc: Set[Rexp] = Set()) : List[ARexp] = xs match {
518
+ − 804
case Nil =>
+ − 805
Nil
+ − 806
case x :: xs => {
+ − 807
val erased = erase(x)
+ − 808
if(acc.contains(erased)){
+ − 809
distinctBy6(xs, acc)
+ − 810
}
+ − 811
else{
+ − 812
val pruned = pAKC6(acc, x, Nil)
526
+ − 813
val newTerms = strongBreakIntoTerms(erase(pruned))
518
+ − 814
pruned match {
+ − 815
case AZERO =>
+ − 816
distinctBy6(xs, acc)
+ − 817
case xPrime =>
526
+ − 818
xPrime :: distinctBy6(xs, newTerms.map(oneSimp) ++: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
518
+ − 819
}
+ − 820
}
+ − 821
}
+ − 822
}
431
+ − 823
492
+ − 824
def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
+ − 825
case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
+ − 826
case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
494
+ − 827
case ZERO => Nil
492
+ − 828
case _ => r::Nil
+ − 829
}
431
+ − 830
+ − 831
+ − 832
492
+ − 833
def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
+ − 834
case (ONE, bs) => (Empty, bs)
+ − 835
case (CHAR(f), bs) => (Chr(f), bs)
+ − 836
case (ALTS(r1, r2), Z::bs1) => {
+ − 837
val (v, bs2) = decode_aux(r1, bs1)
+ − 838
(Left(v), bs2)
+ − 839
}
+ − 840
case (ALTS(r1, r2), S::bs1) => {
+ − 841
val (v, bs2) = decode_aux(r2, bs1)
+ − 842
(Right(v), bs2)
+ − 843
}
+ − 844
case (SEQ(r1, r2), bs) => {
+ − 845
val (v1, bs1) = decode_aux(r1, bs)
+ − 846
val (v2, bs2) = decode_aux(r2, bs1)
+ − 847
(Sequ(v1, v2), bs2)
+ − 848
}
+ − 849
case (STAR(r1), S::bs) => {
+ − 850
val (v, bs1) = decode_aux(r1, bs)
494
+ − 851
//(v)
492
+ − 852
val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
+ − 853
(Stars(v::vs), bs2)
+ − 854
}
+ − 855
case (STAR(_), Z::bs) => (Stars(Nil), bs)
+ − 856
case (RECD(x, r1), bs) => {
+ − 857
val (v, bs1) = decode_aux(r1, bs)
+ − 858
(Rec(x, v), bs1)
+ − 859
}
+ − 860
case (NOT(r), bs) => (Nots(r.toString), bs)
431
+ − 861
}
+ − 862
492
+ − 863
def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
+ − 864
case (v, Nil) => v
+ − 865
case _ => throw new Exception("Not decodable")
+ − 866
}
+ − 867
+ − 868
+ − 869
431
+ − 870
def blexing_simp(r: Rexp, s: String) : Val = {
+ − 871
val bit_code = blex_simp(internalise(r), s.toList)
+ − 872
decode(r, bit_code)
492
+ − 873
}
+ − 874
def simpBlexer(r: Rexp, s: String) : Val = {
+ − 875
Try(blexing_simp(r, s)).getOrElse(Failure)
+ − 876
}
431
+ − 877
492
+ − 878
def strong_blexing_simp(r: Rexp, s: String) : Val = {
+ − 879
decode(r, strong_blex_simp(internalise(r), s.toList))
+ − 880
}
+ − 881
494
+ − 882
def strong_blexing_simp5(r: Rexp, s: String) : Val = {
+ − 883
decode(r, strong_blex_simp5(internalise(r), s.toList))
+ − 884
}
+ − 885
+ − 886
492
+ − 887
def strongBlexer(r: Rexp, s: String) : Val = {
+ − 888
Try(strong_blexing_simp(r, s)).getOrElse(Failure)
+ − 889
}
431
+ − 890
494
+ − 891
def strongBlexer5(r: Rexp, s: String): Val = {
+ − 892
Try(strong_blexing_simp5(r, s)).getOrElse(Failure)
+ − 893
}
+ − 894
492
+ − 895
def strong_blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 896
case Nil => {
+ − 897
if (bnullable(r)) {
+ − 898
//println(asize(r))
+ − 899
val bits = mkepsBC(r)
+ − 900
bits
431
+ − 901
}
492
+ − 902
else
+ − 903
throw new Exception("Not matched")
431
+ − 904
}
492
+ − 905
case c::cs => {
+ − 906
val der_res = bder(c,r)
+ − 907
val simp_res = strongBsimp(der_res)
+ − 908
strong_blex_simp(simp_res, cs)
+ − 909
}
+ − 910
}
431
+ − 911
494
+ − 912
def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match {
+ − 913
case Nil => {
+ − 914
if (bnullable(r)) {
+ − 915
//println(asize(r))
+ − 916
val bits = mkepsBC(r)
+ − 917
bits
+ − 918
}
+ − 919
else
+ − 920
throw new Exception("Not matched")
+ − 921
}
+ − 922
case c::cs => {
+ − 923
val der_res = bder(c,r)
+ − 924
val simp_res = strongBsimp5(der_res)
+ − 925
strong_blex_simp5(simp_res, cs)
+ − 926
}
+ − 927
}
431
+ − 928
+ − 929
+ − 930
def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
+ − 931
case Nil => r
435
+ − 932
case c::s =>
494
+ − 933
//println(erase(r))
435
+ − 934
bders_simp(s, bsimp(bder(c, r)))
431
+ − 935
}
+ − 936
494
+ − 937
def bdersStrong5(s: List[Char], r: ARexp) : ARexp = s match {
+ − 938
case Nil => r
+ − 939
case c::s => bdersStrong5(s, strongBsimp5(bder(c, r)))
+ − 940
}
518
+ − 941
def bdersStrong6(s: List[Char], r: ARexp) : ARexp = s match {
+ − 942
case Nil => r
+ − 943
case c::s => bdersStrong6(s, strongBsimp6(bder(c, r)))
+ − 944
}
431
+ − 945
def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
+ − 946
492
+ − 947
def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
431
+ − 948
case Nil => r
492
+ − 949
case c::s => bdersStrong(s, strongBsimp(bder(c, r)))
431
+ − 950
}
+ − 951
492
+ − 952
def bdersStrongRexp(s: String, r: Rexp) : ARexp = bdersStrong(s.toList, internalise(r))
431
+ − 953
+ − 954
def erase(r:ARexp): Rexp = r match{
+ − 955
case AZERO => ZERO
+ − 956
case AONE(_) => ONE
+ − 957
case ACHAR(bs, c) => CHAR(c)
+ − 958
case AALTS(bs, Nil) => ZERO
+ − 959
case AALTS(bs, a::Nil) => erase(a)
+ − 960
case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
+ − 961
case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
+ − 962
case ASTAR(cs, r)=> STAR(erase(r))
+ − 963
case ANOT(bs, r) => NOT(erase(r))
+ − 964
case AANYCHAR(bs) => ANYCHAR
+ − 965
}
+ − 966
+ − 967
492
+ − 968
def allCharSeq(r: Rexp) : Boolean = r match {
+ − 969
case CHAR(c) => true
+ − 970
case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
+ − 971
case _ => false
+ − 972
}
+ − 973
+ − 974
def flattenSeq(r: Rexp) : String = r match {
+ − 975
case CHAR(c) => c.toString
+ − 976
case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
+ − 977
case _ => throw new Error("flatten unflattenable rexp")
+ − 978
}
431
+ − 979
492
+ − 980
def shortRexpOutput(r: Rexp) : String = r match {
+ − 981
case CHAR(c) => c.toString
+ − 982
case ONE => "1"
+ − 983
case ZERO => "0"
+ − 984
case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 985
case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 986
case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
+ − 987
case STAR(STAR(r)) => "(..)*"// ++ shortRexpOutput(r) ++ "]*"
+ − 988
case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
+ − 989
//case RTOP => "RTOP"
+ − 990
}
+ − 991
431
+ − 992
492
+ − 993
def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 994
case Nil => {
+ − 995
if (bnullable(r)) {
+ − 996
val bits = mkepsBC(r)
+ − 997
bits
+ − 998
}
+ − 999
else
+ − 1000
throw new Exception("Not matched")
+ − 1001
}
+ − 1002
case c::cs => {
+ − 1003
val der_res = bder(c,r)
+ − 1004
val simp_res = bsimp(der_res)
+ − 1005
blex_simp(simp_res, cs)
+ − 1006
}
431
+ − 1007
}
+ − 1008
+ − 1009
492
+ − 1010
+ − 1011
+ − 1012
def size(r: Rexp) : Int = r match {
+ − 1013
case ZERO => 1
+ − 1014
case ONE => 1
+ − 1015
case CHAR(_) => 1
+ − 1016
case ANYCHAR => 1
+ − 1017
case NOT(r0) => 1 + size(r0)
+ − 1018
case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 1019
case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
+ − 1020
case STAR(r) => 1 + size(r)
431
+ − 1021
}
+ − 1022
492
+ − 1023
def asize(a: ARexp) = size(erase(a))
431
+ − 1024
+ − 1025
//pder related
+ − 1026
type Mon = (Char, Rexp)
+ − 1027
type Lin = Set[Mon]
+ − 1028
+ − 1029
def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
+ − 1030
case ZERO => Set()
+ − 1031
case ONE => rs
+ − 1032
case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) )
+ − 1033
}
+ − 1034
def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
+ − 1035
case ZERO => Set()
+ − 1036
case ONE => l
526
+ − 1037
case t => l.map( m => m._2 match
+ − 1038
{
+ − 1039
case ZERO => m
+ − 1040
case ONE => (m._1, t)
+ − 1041
case p => (m._1, SEQ(p, t))
+ − 1042
}
+ − 1043
+ − 1044
)
431
+ − 1045
}
+ − 1046
def lf(r: Rexp): Lin = r match {
+ − 1047
case ZERO => Set()
+ − 1048
case ONE => Set()
+ − 1049
case CHAR(f) => {
+ − 1050
//val Some(c) = alphabet.find(f)
+ − 1051
Set((f, ONE))
+ − 1052
}
+ − 1053
case ALTS(r1, r2) => {
+ − 1054
lf(r1 ) ++ lf(r2)
+ − 1055
}
+ − 1056
case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
+ − 1057
case SEQ(r1, r2) =>{
+ − 1058
if (nullable(r1))
+ − 1059
cir_prod(lf(r1), r2) ++ lf(r2)
+ − 1060
else
+ − 1061
cir_prod(lf(r1), r2)
+ − 1062
}
+ − 1063
}
+ − 1064
def lfs(r: Set[Rexp]): Lin = {
+ − 1065
r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
+ − 1066
}
+ − 1067
+ − 1068
def pder(x: Char, t: Rexp): Set[Rexp] = {
+ − 1069
val lft = lf(t)
+ − 1070
(lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
+ − 1071
}
+ − 1072
def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
+ − 1073
case x::xs => pders(xs, pder(x, t))
+ − 1074
case Nil => Set(t)
+ − 1075
}
+ − 1076
def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
+ − 1077
case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 1078
case Nil => ts
+ − 1079
}
526
+ − 1080
def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] =
+ − 1081
ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
431
+ − 1082
def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
+ − 1083
//all implementation of partial derivatives that involve set union are potentially buggy
+ − 1084
//because they don't include the original regular term before they are pdered.
+ − 1085
//now only pderas is fixed.
526
+ − 1086
def pderas(t: Set[Rexp], d: Int): Set[Rexp] =
+ − 1087
if(d > 0)
+ − 1088
pderas(lfs(t).map(mon => mon._2), d - 1) ++ t
+ − 1089
else
+ − 1090
lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
431
+ − 1091
def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
+ − 1092
def awidth(r: Rexp) : Int = r match {
+ − 1093
case CHAR(c) => 1
+ − 1094
case SEQ(r1, r2) => awidth(r1) + awidth(r2)
+ − 1095
case ALTS(r1, r2) => awidth(r1) + awidth(r2)
+ − 1096
case ONE => 0
+ − 1097
case ZERO => 0
+ − 1098
case STAR(r) => awidth(r)
+ − 1099
}
+ − 1100
//def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
+ − 1101
//def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
+ − 1102
def o(r: Rexp) = if (nullable(r)) ONE else ZERO
+ − 1103
//def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
+ − 1104
def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
+ − 1105
case ZERO => Set[Rexp]()
+ − 1106
case ONE => Set[Rexp]()
+ − 1107
case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
+ − 1108
case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
+ − 1109
case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
+ − 1110
case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
+ − 1111
}
+ − 1112
def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
+ − 1113
case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 1114
case Nil => ts
+ − 1115
}
+ − 1116
def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
+ − 1117
+ − 1118
+ − 1119
+ − 1120
def starPrint(r: Rexp) : Unit = r match {
+ − 1121
+ − 1122
case SEQ(head, rstar) =>
+ − 1123
println(shortRexpOutput(head) ++ "~STARREG")
+ − 1124
case STAR(rstar) =>
+ − 1125
println("STARREG")
+ − 1126
case ALTS(r1, r2) =>
+ − 1127
println("(")
+ − 1128
starPrint(r1)
+ − 1129
println("+")
+ − 1130
starPrint(r2)
+ − 1131
println(")")
+ − 1132
case ZERO => println("0")
+ − 1133
}
+ − 1134
+ − 1135
// @arg(doc = "small tests")
516
+ − 1136
def n_astar_list(d: Int) : Rexp = {
+ − 1137
if(d == 0) STAR("a")
+ − 1138
else ALTS(STAR("a" * d), n_astar_list(d - 1))
+ − 1139
}
+ − 1140
def n_astar_alts(d: Int) : Rexp = d match {
+ − 1141
case 0 => n_astar_list(0)
+ − 1142
case d => STAR(n_astar_list(d))
+ − 1143
// case r1 :: r2 :: Nil => ALTS(r1, r2)
+ − 1144
// case r1 :: Nil => r1
+ − 1145
// case r :: rs => ALTS(r, n_astar_alts(rs))
+ − 1146
// case Nil => throw new Error("should give at least 1 elem")
+ − 1147
}
+ − 1148
def n_astar_aux(d: Int) = {
+ − 1149
if(d == 0) n_astar_alts(0)
+ − 1150
else ALTS(n_astar_alts(d), n_astar_alts(d - 1))
+ − 1151
}
+ − 1152
+ − 1153
def n_astar(d: Int) : Rexp = STAR(n_astar_aux(d))
+ − 1154
//val STARREG = n_astar(3)
+ − 1155
// ( STAR("a") |
+ − 1156
// ("a" | "aa").% |
+ − 1157
// ( "a" | "aa" | "aaa").%
+ − 1158
// ).%
+ − 1159
//( "a" | "aa" | "aaa" | "aaaa").% |
+ − 1160
//( "a" | "aa" | "aaa" | "aaaa" | "aaaaa").%
431
+ − 1161
(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa"| ( STAR("aaaaaaa")) | STAR("aaaaaaaaaaa"))).%).%).%
+ − 1162
+ − 1163
// @main
492
+ − 1164
516
+ − 1165
def lcm(list: Seq[Int]):Int=list.foldLeft(1:Int){
+ − 1166
(a, b) => b * a /
+ − 1167
Stream.iterate((a,b)){case (x,y) => (y, x%y)}.dropWhile(_._2 != 0).head._1.abs
+ − 1168
}
492
+ − 1169
431
+ − 1170
def small() = {
516
+ − 1171
//val pderSTAR = pderUNIV(STARREG)
492
+ − 1172
516
+ − 1173
//val refSize = pderSTAR.map(size(_)).sum
518
+ − 1174
for(n <- 5 to 5){
516
+ − 1175
val STARREG = n_astar(n)
+ − 1176
val iMax = (lcm((1 to n).toList))
518
+ − 1177
for(i <- 1 to iMax + 2){// 100, 400, 800, 840, 841, 900
516
+ − 1178
val prog0 = "a" * i
+ − 1179
//println(s"test: $prog0")
518
+ − 1180
print(i)
516
+ − 1181
print(" ")
+ − 1182
// print(i)
+ − 1183
// print(" ")
+ − 1184
println(asize(bders_simp(prog0.toList, internalise(STARREG))))
+ − 1185
}
492
+ − 1186
}
431
+ − 1187
}
+ − 1188
492
+ − 1189
def generator_test() {
+ − 1190
494
+ − 1191
// test(rexp(7), 1000) { (r: Rexp) =>
+ − 1192
// val ss = stringsFromRexp(r)
+ − 1193
// val boolList = ss.map(s => {
+ − 1194
// val simpVal = simpBlexer(r, s)
+ − 1195
// val strongVal = strongBlexer(r, s)
+ − 1196
// // println(simpVal)
+ − 1197
// // println(strongVal)
+ − 1198
// (simpVal == strongVal) && (simpVal != None)
+ − 1199
// })
+ − 1200
// !boolList.exists(b => b == false)
+ − 1201
// }
+ − 1202
// test(single(STAR(ALTS(STAR(CHAR('c')),ALTS(CHAR('c'),ZERO)))), 100000) { (r: Rexp) =>
+ − 1203
// val ss = stringsFromRexp(r)
+ − 1204
// val boolList = ss.map(s => {
+ − 1205
// val bdStrong = bdersStrong(s.toList, internalise(r))
+ − 1206
// val bdStrong5 = bdersStrong5(s.toList, internalise(r))
+ − 1207
// // println(shortRexpOutput(r))
+ − 1208
// // println(s)
+ − 1209
// // println(strongVal)
+ − 1210
// // println(strongVal5)
+ − 1211
// // (strongVal == strongVal5)
493
+ − 1212
494
+ − 1213
// if(asize(bdStrong5) > asize(bdStrong)){
+ − 1214
// println(s)
+ − 1215
// println(shortRexpOutput(erase(bdStrong5)))
+ − 1216
// println(shortRexpOutput(erase(bdStrong)))
+ − 1217
// }
+ − 1218
// asize(bdStrong5) <= asize(bdStrong)
+ − 1219
// })
+ − 1220
// !boolList.exists(b => b == false)
+ − 1221
// }
500
+ − 1222
//*** example where bdStrong5 has a smaller size than bdStrong
+ − 1223
// test(single(STAR(SEQ(ALTS(SEQ(STAR(CHAR('a')),ALTS(ALTS(ONE,ZERO),SEQ(ONE,ONE))),CHAR('a')),ONE))), 1) { (r: Rexp) =>
+ − 1224
//*** depth 5 example bdStrong5 larger size than bdStrong
+ − 1225
// test(single(STAR(SEQ(SEQ(ALTS(CHAR('b'),STAR(CHAR('b'))),CHAR('b')),(ALTS(STAR(CHAR('c')), ONE))))), 1) {(r: Rexp) =>
+ − 1226
+ − 1227
+ − 1228
+ − 1229
//sanity check from Christian's request
+ − 1230
// val r = ("a" | "ab") ~ ("bc" | "c")
+ − 1231
// val a = internalise(r)
+ − 1232
// val aval = blexing_simp(r, "abc")
+ − 1233
// println(aval)
+ − 1234
+ − 1235
//sample counterexample:(depth 7)
+ − 1236
//STAR(SEQ(ALTS(STAR(STAR(STAR(STAR(CHAR(c))))),ALTS(CHAR(c),CHAR(b))),ALTS(ZERO,SEQ(ALTS(ALTS(STAR(CHAR(c)),SEQ(CHAR(b),CHAR(a))),CHAR(c)),STAR(ALTS(ALTS(ONE,CHAR(a)),STAR(CHAR(c))))))))
+ − 1237
//(depth5)
+ − 1238
//STAR(SEQ(ALTS(ALTS(STAR(CHAR(b)),SEQ(ONE,CHAR(b))),SEQ(STAR(CHAR(a)),CHAR(b))),ALTS(ZERO,ALTS(STAR(CHAR(b)),STAR(CHAR(a))))))
518
+ − 1239
test(rexp(4), 100000) { (r: Rexp) =>
500
+ − 1240
// ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) =>
493
+ − 1241
val ss = stringsFromRexp(r)
518
+ − 1242
val boolList = ss.filter(s => s != "").map(s => {
+ − 1243
//val bdStrong = bdersStrong(s.toList, internalise(r))
+ − 1244
val bdStrong6 = bdersStrong6(s.toList, internalise(r))
+ − 1245
val bdStrong6Set = breakIntoTerms(erase(bdStrong6))
+ − 1246
val pdersSet = pderUNIV(r).flatMap(r => breakIntoTerms(r))
494
+ − 1247
// println(s)
518
+ − 1248
// println(bdStrong6Set.size, pdersSet.size)
+ − 1249
bdStrong6Set.size <= pdersSet.size
493
+ − 1250
})
494
+ − 1251
// println(boolList)
500
+ − 1252
//!boolList.exists(b => b == false)
493
+ − 1253
!boolList.exists(b => b == false)
492
+ − 1254
}
493
+ − 1255
+ − 1256
492
+ − 1257
}
518
+ − 1258
// small()
526
+ − 1259
// generator_test()
493
+ − 1260
518
+ − 1261
def counterexample_check() {
526
+ − 1262
val r = SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE)))//STAR(SEQ(ALTS(STAR(CHAR('c')),CHAR('c')),SEQ(ALTS(CHAR('c'),ONE),ONE)))
+ − 1263
val s = "ccc"
518
+ − 1264
val bdStrong5 = bdersStrong6(s.toList, internalise(r))
+ − 1265
val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
+ − 1266
val pdersSet = pderUNIV(r)//.map(oneSimp).flatMap(r => breakIntoTerms(r))
+ − 1267
println("original regex ")
+ − 1268
rprint(r)
+ − 1269
println("after strong bsimp")
+ − 1270
aprint(bdStrong5)
+ − 1271
println("turned into a set %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ")
+ − 1272
rsprint(bdStrong5Set)
+ − 1273
println("after pderUNIV")
+ − 1274
rsprint(pdersSet.toList)
+ − 1275
}
+ − 1276
// counterexample_check()
526
+ − 1277
def linform_test() {
+ − 1278
val r = STAR(SEQ(STAR(CHAR('c')), ONE))
+ − 1279
val r_linforms = lf(r)
+ − 1280
println(r_linforms.size)
+ − 1281
}
+ − 1282
linform_test()
516
+ − 1283
// 1
518
+ − 1284
def newStrong_test() {
+ − 1285
val r2 = (CHAR('b') | ONE)
+ − 1286
val r0 = CHAR('d')
+ − 1287
val r1 = (ONE | CHAR('c'))
+ − 1288
val expRexp = (SEQ(r2, r0) | SEQ(SEQ(r1, r2), r0))
+ − 1289
println(s"original regex is: ")
+ − 1290
rprint(expRexp)
+ − 1291
val expSimp5 = strongBsimp5(internalise(expRexp))
+ − 1292
val expSimp6 = strongBsimp6(internalise(expRexp))
+ − 1293
aprint(expSimp5)
+ − 1294
aprint(expSimp6)
+ − 1295
}
+ − 1296
// newStrong_test()
516
+ − 1297
// SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))),
+ − 1298
// SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))),
+ − 1299
// STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))),
+ − 1300
// SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE))
493
+ − 1301
+ − 1302
+ − 1303
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))
+ − 1304
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))