518
+ − 1
//Strong Bsimp to obtain Antimirov's cubic bound
+ − 2
431
+ − 3
// A simple lexer inspired by work of Sulzmann & Lu
+ − 4
//==================================================
+ − 5
//
+ − 6
// Call the test cases with
+ − 7
//
+ − 8
// amm lexer.sc small
+ − 9
// amm lexer.sc fib
+ − 10
// amm lexer.sc loops
+ − 11
// amm lexer.sc email
+ − 12
//
+ − 13
// amm lexer.sc all
+ − 14
514
+ − 15
431
+ − 16
+ − 17
// regular expressions including records
+ − 18
abstract class Rexp
+ − 19
case object ZERO extends Rexp
+ − 20
case object ONE extends Rexp
+ − 21
case object ANYCHAR extends Rexp
+ − 22
case class CHAR(c: Char) extends Rexp
+ − 23
case class ALTS(r1: Rexp, r2: Rexp) extends Rexp
+ − 24
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+ − 25
case class STAR(r: Rexp) extends Rexp
+ − 26
case class RECD(x: String, r: Rexp) extends Rexp
+ − 27
case class NTIMES(n: Int, r: Rexp) extends Rexp
+ − 28
case class OPTIONAL(r: Rexp) extends Rexp
+ − 29
case class NOT(r: Rexp) extends Rexp
+ − 30
// records for extracting strings or tokens
+ − 31
+ − 32
// values
+ − 33
abstract class Val
492
+ − 34
case object Failure extends Val
431
+ − 35
case object Empty extends Val
+ − 36
case class Chr(c: Char) extends Val
+ − 37
case class Sequ(v1: Val, v2: Val) extends Val
+ − 38
case class Left(v: Val) extends Val
+ − 39
case class Right(v: Val) extends Val
+ − 40
case class Stars(vs: List[Val]) extends Val
+ − 41
case class Rec(x: String, v: Val) extends Val
+ − 42
case class Ntime(vs: List[Val]) extends Val
+ − 43
case class Optionall(v: Val) extends Val
+ − 44
case class Nots(s: String) extends Val
+ − 45
+ − 46
+ − 47
+ − 48
abstract class Bit
+ − 49
case object Z extends Bit
+ − 50
case object S extends Bit
+ − 51
+ − 52
+ − 53
type Bits = List[Bit]
+ − 54
+ − 55
abstract class ARexp
+ − 56
case object AZERO extends ARexp
+ − 57
case class AONE(bs: Bits) extends ARexp
+ − 58
case class ACHAR(bs: Bits, c: Char) extends ARexp
+ − 59
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp
+ − 60
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp
+ − 61
case class ASTAR(bs: Bits, r: ARexp) extends ARexp
+ − 62
case class ANOT(bs: Bits, r: ARexp) extends ARexp
+ − 63
case class AANYCHAR(bs: Bits) extends ARexp
+ − 64
514
+ − 65
import scala.util.Try
+ − 66
492
+ − 67
trait Generator[+T] {
+ − 68
self => // an alias for "this"
+ − 69
def generate(): T
+ − 70
+ − 71
def gen(n: Int) : List[T] =
+ − 72
if (n == 0) Nil else self.generate() :: gen(n - 1)
+ − 73
+ − 74
def map[S](f: T => S): Generator[S] = new Generator[S] {
+ − 75
def generate = f(self.generate())
+ − 76
}
+ − 77
def flatMap[S](f: T => Generator[S]): Generator[S] = new Generator[S] {
+ − 78
def generate = f(self.generate()).generate()
+ − 79
}
+ − 80
+ − 81
+ − 82
}
+ − 83
+ − 84
// tests a property according to a given random generator
+ − 85
def test[T](r: Generator[T], amount: Int = 100)(pred: T => Boolean) {
+ − 86
for (_ <- 0 until amount) {
+ − 87
val value = r.generate()
+ − 88
assert(pred(value), s"Test failed for: $value")
+ − 89
}
+ − 90
println(s"Test passed $amount times")
+ − 91
}
+ − 92
def test2[T, S](r: Generator[T], s: Generator[S], amount: Int = 100)(pred: (T, S) => Boolean) {
+ − 93
for (_ <- 0 until amount) {
+ − 94
val valueR = r.generate()
+ − 95
val valueS = s.generate()
+ − 96
assert(pred(valueR, valueS), s"Test failed for: $valueR, $valueS")
+ − 97
}
+ − 98
println(s"Test passed $amount times")
+ − 99
}
+ − 100
+ − 101
// random integers
+ − 102
val integers = new Generator[Int] {
+ − 103
val rand = new java.util.Random
+ − 104
def generate() = rand.nextInt()
+ − 105
}
+ − 106
+ − 107
// random booleans
+ − 108
val booleans = integers.map(_ > 0)
+ − 109
+ − 110
// random integers in the range lo and high
+ − 111
def range(lo: Int, hi: Int): Generator[Int] =
+ − 112
for (x <- integers) yield (lo + x.abs % (hi - lo)).abs
+ − 113
+ − 114
// random characters
+ − 115
def chars_range(lo: Char, hi: Char) = range(lo, hi).map(_.toChar)
+ − 116
val chars = chars_range('a', 'z')
+ − 117
+ − 118
+ − 119
def oneOf[T](xs: T*): Generator[T] =
+ − 120
for (idx <- range(0, xs.length)) yield xs(idx)
+ − 121
+ − 122
def single[T](x: T) = new Generator[T] {
+ − 123
def generate() = x
+ − 124
}
+ − 125
+ − 126
def pairs[T, U](t: Generator[T], u: Generator[U]): Generator[(T, U)] =
+ − 127
for (x <- t; y <- u) yield (x, y)
+ − 128
+ − 129
// lists
+ − 130
def emptyLists = single(Nil)
+ − 131
+ − 132
def nonEmptyLists : Generator[List[Int]] =
+ − 133
for (head <- integers; tail <- lists) yield head :: tail
+ − 134
+ − 135
def lists: Generator[List[Int]] = for {
+ − 136
kind <- booleans
+ − 137
list <- if (kind) emptyLists else nonEmptyLists
+ − 138
} yield list
+ − 139
+ − 140
def char_list(len: Int): Generator[List[Char]] = {
+ − 141
if(len <= 0) single(Nil)
+ − 142
else{
+ − 143
for {
500
+ − 144
c <- chars_range('a', 'c')
492
+ − 145
tail <- char_list(len - 1)
+ − 146
} yield c :: tail
+ − 147
}
+ − 148
}
+ − 149
+ − 150
def strings(len: Int): Generator[String] = for(cs <- char_list(len)) yield cs.toString
+ − 151
493
+ − 152
def sampleString(r: Rexp) : List[String] = r match {
+ − 153
case STAR(r) => stringsFromRexp(r).flatMap(s => List("", s, s ++ s))//only generate 0, 1, 2 reptitions
+ − 154
case SEQ(r1, r2) => stringsFromRexp(r1).flatMap(s1 => stringsFromRexp(r2).map(s2 => s1 ++ s2) )
+ − 155
case ALTS(r1, r2) => throw new Error(s" Rexp ${r} not expected: all alternatives are supposed to have been opened up")
+ − 156
case ONE => "" :: Nil
+ − 157
case ZERO => Nil
+ − 158
case CHAR(c) => c.toString :: Nil
+ − 159
+ − 160
}
+ − 161
+ − 162
def stringsFromRexp(r: Rexp) : List[String] =
+ − 163
breakIntoTerms(r).flatMap(r => sampleString(r))
+ − 164
492
+ − 165
+ − 166
// (simple) binary trees
+ − 167
trait Tree[T]
+ − 168
case class Inner[T](left: Tree[T], right: Tree[T]) extends Tree[T]
+ − 169
case class Leaf[T](x: T) extends Tree[T]
+ − 170
+ − 171
def leafs[T](t: Generator[T]): Generator[Leaf[T]] =
+ − 172
for (x <- t) yield Leaf(x)
+ − 173
+ − 174
def inners[T](t: Generator[T]): Generator[Inner[T]] =
+ − 175
for (l <- trees(t); r <- trees(t)) yield Inner(l, r)
+ − 176
+ − 177
def trees[T](t: Generator[T]): Generator[Tree[T]] =
+ − 178
for (kind <- range(0, 2);
+ − 179
tree <- if (kind == 0) leafs(t) else inners(t)) yield tree
+ − 180
+ − 181
// regular expressions
+ − 182
+ − 183
// generates random leaf-regexes; prefers CHAR-regexes
+ − 184
def leaf_rexp() : Generator[Rexp] =
+ − 185
for (kind <- range(0, 5);
+ − 186
c <- chars_range('a', 'd')) yield
+ − 187
kind match {
+ − 188
case 0 => ZERO
+ − 189
case 1 => ONE
+ − 190
case _ => CHAR(c)
+ − 191
}
+ − 192
+ − 193
// generates random inner regexes with maximum depth d
+ − 194
def inner_rexp(d: Int) : Generator[Rexp] =
+ − 195
for (kind <- range(0, 3);
+ − 196
l <- rexp(d);
+ − 197
r <- rexp(d))
+ − 198
yield kind match {
+ − 199
case 0 => ALTS(l, r)
+ − 200
case 1 => SEQ(l, r)
+ − 201
case 2 => STAR(r)
+ − 202
}
+ − 203
+ − 204
// generates random regexes with maximum depth d;
+ − 205
// prefers inner regexes in 2/3 of the cases
+ − 206
def rexp(d: Int = 100): Generator[Rexp] =
+ − 207
for (kind <- range(0, 3);
+ − 208
r <- if (d <= 0 || kind == 0) leaf_rexp() else inner_rexp(d - 1)) yield r
+ − 209
+ − 210
+ − 211
// some test functions for rexps
+ − 212
def height(r: Rexp) : Int = r match {
+ − 213
case ZERO => 1
+ − 214
case ONE => 1
+ − 215
case CHAR(_) => 1
+ − 216
case ALTS(r1, r2) => 1 + List(height(r1), height(r2)).max
+ − 217
case SEQ(r1, r2) => 1 + List(height(r1), height(r2)).max
+ − 218
case STAR(r) => 1 + height(r)
+ − 219
}
+ − 220
+ − 221
// def size(r: Rexp) : Int = r match {
+ − 222
// case ZERO => 1
+ − 223
// case ONE => 1
+ − 224
// case CHAR(_) => 1
+ − 225
// case ALTS(r1, r2) => 1 + size(r1) + size(r2)
+ − 226
// case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 227
// case STAR(r) => 1 + size(r)
+ − 228
// }
+ − 229
+ − 230
// randomly subtracts 1 or 2 from the STAR case
+ − 231
def size_faulty(r: Rexp) : Int = r match {
+ − 232
case ZERO => 1
+ − 233
case ONE => 1
+ − 234
case CHAR(_) => 1
+ − 235
case ALTS(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
+ − 236
case SEQ(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
+ − 237
case STAR(r) => 1 + size_faulty(r) - range(0, 2).generate
+ − 238
}
+ − 239
431
+ − 240
+ − 241
+ − 242
// some convenience for typing in regular expressions
+ − 243
+ − 244
def charlist2rexp(s : List[Char]): Rexp = s match {
+ − 245
case Nil => ONE
+ − 246
case c::Nil => CHAR(c)
+ − 247
case c::s => SEQ(CHAR(c), charlist2rexp(s))
+ − 248
}
+ − 249
implicit def string2rexp(s : String) : Rexp =
+ − 250
charlist2rexp(s.toList)
+ − 251
+ − 252
implicit def RexpOps(r: Rexp) = new {
+ − 253
def | (s: Rexp) = ALTS(r, s)
+ − 254
def % = STAR(r)
+ − 255
def ~ (s: Rexp) = SEQ(r, s)
+ − 256
}
+ − 257
+ − 258
implicit def stringOps(s: String) = new {
+ − 259
def | (r: Rexp) = ALTS(s, r)
+ − 260
def | (r: String) = ALTS(s, r)
+ − 261
def % = STAR(s)
+ − 262
def ~ (r: Rexp) = SEQ(s, r)
+ − 263
def ~ (r: String) = SEQ(s, r)
+ − 264
def $ (r: Rexp) = RECD(s, r)
+ − 265
}
+ − 266
+ − 267
def nullable(r: Rexp) : Boolean = r match {
+ − 268
case ZERO => false
+ − 269
case ONE => true
+ − 270
case CHAR(_) => false
+ − 271
case ANYCHAR => false
+ − 272
case ALTS(r1, r2) => nullable(r1) || nullable(r2)
+ − 273
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ − 274
case STAR(_) => true
+ − 275
case RECD(_, r1) => nullable(r1)
+ − 276
case NTIMES(n, r) => if (n == 0) true else nullable(r)
+ − 277
case OPTIONAL(r) => true
+ − 278
case NOT(r) => !nullable(r)
+ − 279
}
+ − 280
+ − 281
def der(c: Char, r: Rexp) : Rexp = r match {
+ − 282
case ZERO => ZERO
+ − 283
case ONE => ZERO
+ − 284
case CHAR(d) => if (c == d) ONE else ZERO
+ − 285
case ANYCHAR => ONE
+ − 286
case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
+ − 287
case SEQ(r1, r2) =>
+ − 288
if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
+ − 289
else SEQ(der(c, r1), r2)
+ − 290
case STAR(r) => SEQ(der(c, r), STAR(r))
+ − 291
case RECD(_, r1) => der(c, r1)
+ − 292
case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
+ − 293
case OPTIONAL(r) => der(c, r)
+ − 294
case NOT(r) => NOT(der(c, r))
+ − 295
}
+ − 296
+ − 297
+ − 298
// extracts a string from a value
+ − 299
def flatten(v: Val) : String = v match {
+ − 300
case Empty => ""
+ − 301
case Chr(c) => c.toString
+ − 302
case Left(v) => flatten(v)
+ − 303
case Right(v) => flatten(v)
+ − 304
case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
+ − 305
case Stars(vs) => vs.map(flatten).mkString
+ − 306
case Ntime(vs) => vs.map(flatten).mkString
+ − 307
case Optionall(v) => flatten(v)
+ − 308
case Rec(_, v) => flatten(v)
+ − 309
}
+ − 310
+ − 311
+ − 312
// extracts an environment from a value;
+ − 313
// used for tokenising a string
+ − 314
def env(v: Val) : List[(String, String)] = v match {
+ − 315
case Empty => Nil
+ − 316
case Chr(c) => Nil
+ − 317
case Left(v) => env(v)
+ − 318
case Right(v) => env(v)
+ − 319
case Sequ(v1, v2) => env(v1) ::: env(v2)
+ − 320
case Stars(vs) => vs.flatMap(env)
+ − 321
case Ntime(vs) => vs.flatMap(env)
+ − 322
case Rec(x, v) => (x, flatten(v))::env(v)
+ − 323
case Optionall(v) => env(v)
+ − 324
case Nots(s) => ("Negative", s) :: Nil
+ − 325
}
+ − 326
+ − 327
+ − 328
// The injection and mkeps part of the lexer
+ − 329
//===========================================
+ − 330
+ − 331
def mkeps(r: Rexp) : Val = r match {
+ − 332
case ONE => Empty
+ − 333
case ALTS(r1, r2) =>
+ − 334
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+ − 335
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+ − 336
case STAR(r) => Stars(Nil)
+ − 337
case RECD(x, r) => Rec(x, mkeps(r))
+ − 338
case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
+ − 339
case OPTIONAL(r) => Optionall(Empty)
+ − 340
case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")
+ − 341
else Nots("")//Nots(s.reverse.toString)
+ − 342
// case NOT(ZERO) => Empty
+ − 343
// case NOT(CHAR(c)) => Empty
+ − 344
// case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
+ − 345
// case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
+ − 346
// case NOT(STAR(r)) => Stars(Nil)
+ − 347
+ − 348
}
+ − 349
+ − 350
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+ − 351
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+ − 352
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+ − 353
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+ − 354
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+ − 355
case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
+ − 356
case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
+ − 357
case (CHAR(d), Empty) => Chr(c)
+ − 358
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+ − 359
case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
+ − 360
case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
+ − 361
case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
+ − 362
case (ANYCHAR, Empty) => Chr(c)
+ − 363
}
+ − 364
+ − 365
// some "rectification" functions for simplification
+ − 366
+ − 367
+ − 368
+ − 369
+ − 370
// The Lexing Rules for the WHILE Language
+ − 371
+ − 372
// bnullable function: tests whether the aregular
+ − 373
// expression can recognise the empty string
+ − 374
def bnullable (r: ARexp) : Boolean = r match {
+ − 375
case AZERO => false
+ − 376
case AONE(_) => true
+ − 377
case ACHAR(_,_) => false
+ − 378
case AALTS(_, rs) => rs.exists(bnullable)
+ − 379
case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
+ − 380
case ASTAR(_, _) => true
+ − 381
case ANOT(_, rn) => !bnullable(rn)
+ − 382
}
+ − 383
+ − 384
def mkepsBC(r: ARexp) : Bits = r match {
+ − 385
case AONE(bs) => bs
+ − 386
case AALTS(bs, rs) => {
+ − 387
val n = rs.indexWhere(bnullable)
+ − 388
bs ++ mkepsBC(rs(n))
+ − 389
}
+ − 390
case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
+ − 391
case ASTAR(bs, r) => bs ++ List(Z)
+ − 392
case ANOT(bs, rn) => bs
+ − 393
}
+ − 394
+ − 395
+ − 396
def bder(c: Char, r: ARexp) : ARexp = r match {
+ − 397
case AZERO => AZERO
+ − 398
case AONE(_) => AZERO
+ − 399
case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
+ − 400
case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
+ − 401
case ASEQ(bs, r1, r2) =>
+ − 402
if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
+ − 403
else ASEQ(bs, bder(c, r1), r2)
+ − 404
case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
+ − 405
case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
+ − 406
case AANYCHAR(bs) => AONE(bs)
+ − 407
}
+ − 408
+ − 409
def fuse(bs: Bits, r: ARexp) : ARexp = r match {
+ − 410
case AZERO => AZERO
+ − 411
case AONE(cs) => AONE(bs ++ cs)
+ − 412
case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
+ − 413
case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
+ − 414
case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
+ − 415
case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
+ − 416
case ANOT(cs, r) => ANOT(bs ++ cs, r)
+ − 417
}
+ − 418
+ − 419
+ − 420
def internalise(r: Rexp) : ARexp = r match {
+ − 421
case ZERO => AZERO
+ − 422
case ONE => AONE(Nil)
+ − 423
case CHAR(c) => ACHAR(Nil, c)
+ − 424
//case PRED(f) => APRED(Nil, f)
+ − 425
case ALTS(r1, r2) =>
+ − 426
AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
+ − 427
// case ALTS(r1::rs) => {
+ − 428
// val AALTS(Nil, rs2) = internalise(ALTS(rs))
+ − 429
// AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
+ − 430
// }
+ − 431
case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
+ − 432
case STAR(r) => ASTAR(Nil, internalise(r))
+ − 433
case RECD(x, r) => internalise(r)
+ − 434
case NOT(r) => ANOT(Nil, internalise(r))
+ − 435
case ANYCHAR => AANYCHAR(Nil)
+ − 436
}
+ − 437
+ − 438
+ − 439
def bsimp(r: ARexp): ARexp =
+ − 440
{
+ − 441
r match {
+ − 442
case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
+ − 443
case (AZERO, _) => AZERO
+ − 444
case (_, AZERO) => AZERO
+ − 445
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 446
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 447
}
+ − 448
case AALTS(bs1, rs) => {
+ − 449
val rs_simp = rs.map(bsimp(_))
+ − 450
val flat_res = flats(rs_simp)
+ − 451
val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
+ − 452
dist_res match {
+ − 453
case Nil => AZERO
+ − 454
case s :: Nil => fuse(bs1, s)
+ − 455
case rs => AALTS(bs1, rs)
+ − 456
}
+ − 457
+ − 458
}
+ − 459
case r => r
+ − 460
}
492
+ − 461
}
+ − 462
def strongBsimp(r: ARexp): ARexp =
+ − 463
{
+ − 464
r match {
+ − 465
case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
+ − 466
case (AZERO, _) => AZERO
+ − 467
case (_, AZERO) => AZERO
+ − 468
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 469
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
431
+ − 470
}
492
+ − 471
case AALTS(bs1, rs) => {
+ − 472
val rs_simp = rs.map(strongBsimp(_))
+ − 473
val flat_res = flats(rs_simp)
+ − 474
val dist_res = distinctBy4(flat_res)//distinctBy(flat_res, erase)
+ − 475
dist_res match {
+ − 476
case Nil => AZERO
+ − 477
case s :: Nil => fuse(bs1, s)
+ − 478
case rs => AALTS(bs1, rs)
+ − 479
}
+ − 480
+ − 481
}
+ − 482
case r => r
431
+ − 483
}
492
+ − 484
}
431
+ − 485
494
+ − 486
def strongBsimp5(r: ARexp): ARexp =
+ − 487
{
+ − 488
// println("was this called?")
+ − 489
r match {
+ − 490
case ASEQ(bs1, r1, r2) => (strongBsimp5(r1), strongBsimp5(r2)) match {
+ − 491
case (AZERO, _) => AZERO
+ − 492
case (_, AZERO) => AZERO
+ − 493
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 494
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 495
}
+ − 496
case AALTS(bs1, rs) => {
+ − 497
// println("alts case")
+ − 498
val rs_simp = rs.map(strongBsimp5(_))
+ − 499
val flat_res = flats(rs_simp)
500
+ − 500
var dist_res = distinctBy5(flat_res)//distinctBy(flat_res, erase)
518
+ − 501
// var dist2_res = distinctBy5(dist_res)
+ − 502
// while(dist_res != dist2_res){
+ − 503
// dist_res = dist2_res
+ − 504
// dist2_res = distinctBy5(dist_res)
+ − 505
// }
+ − 506
(dist_res) match {
+ − 507
case Nil => AZERO
+ − 508
case s :: Nil => fuse(bs1, s)
+ − 509
case rs => AALTS(bs1, rs)
500
+ − 510
}
518
+ − 511
}
+ − 512
case r => r
+ − 513
}
+ − 514
}
+ − 515
+ − 516
def strongBsimp6(r: ARexp): ARexp =
+ − 517
{
+ − 518
// println("was this called?")
+ − 519
r match {
+ − 520
case ASEQ(bs1, r1, r2) => (strongBsimp6(r1), strongBsimp6(r2)) match {
+ − 521
case (AZERO, _) => AZERO
+ − 522
case (_, AZERO) => AZERO
+ − 523
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 524
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 525
}
+ − 526
case AALTS(bs1, rs) => {
+ − 527
// println("alts case")
+ − 528
val rs_simp = rs.map(strongBsimp6(_))
+ − 529
val flat_res = flats(rs_simp)
+ − 530
var dist_res = distinctBy6(flat_res)//distinctBy(flat_res, erase)
+ − 531
(dist_res) match {
494
+ − 532
case Nil => AZERO
+ − 533
case s :: Nil => fuse(bs1, s)
+ − 534
case rs => AALTS(bs1, rs)
+ − 535
}
+ − 536
}
+ − 537
case r => r
+ − 538
}
+ − 539
}
+ − 540
492
+ − 541
def bders (s: List[Char], r: ARexp) : ARexp = s match {
+ − 542
case Nil => r
+ − 543
case c::s => bders(s, bder(c, r))
+ − 544
}
431
+ − 545
492
+ − 546
def flats(rs: List[ARexp]): List[ARexp] = rs match {
431
+ − 547
case Nil => Nil
492
+ − 548
case AZERO :: rs1 => flats(rs1)
+ − 549
case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
+ − 550
case r1 :: rs2 => r1 :: flats(rs2)
431
+ − 551
}
+ − 552
492
+ − 553
def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
+ − 554
case Nil => Nil
+ − 555
case (x::xs) => {
+ − 556
val res = f(x)
+ − 557
if (acc.contains(res)) distinctBy(xs, f, acc)
+ − 558
else x::distinctBy(xs, f, res::acc)
431
+ − 559
}
492
+ − 560
}
431
+ − 561
+ − 562
492
+ − 563
def pruneRexp(r: ARexp, allowableTerms: List[Rexp]) : ARexp = {
+ − 564
r match {
+ − 565
case ASEQ(bs, r1, r2) =>
+ − 566
val termsTruncated = allowableTerms.collect(rt => rt match {
+ − 567
case SEQ(r1p, r2p) if(r2p == erase(r2)) => r1p//if(r2p == erase(r2))
+ − 568
})
+ − 569
val pruned : ARexp = pruneRexp(r1, termsTruncated)
+ − 570
pruned match {
+ − 571
case AZERO => AZERO
+ − 572
case AONE(bs1) => fuse(bs ++ bs1, r2)
+ − 573
case pruned1 => ASEQ(bs, pruned1, r2)
+ − 574
}
+ − 575
+ − 576
case AALTS(bs, rs) =>
+ − 577
//allowableTerms.foreach(a => println(shortRexpOutput(a)))
+ − 578
val rsp = rs.map(r =>
+ − 579
pruneRexp(r, allowableTerms)
+ − 580
)
+ − 581
.filter(r => r != AZERO)
+ − 582
rsp match {
+ − 583
case Nil => AZERO
+ − 584
case r1::Nil => fuse(bs, r1)
+ − 585
case rs1 => AALTS(bs, rs1)
+ − 586
}
+ − 587
case r =>
+ − 588
if(allowableTerms.contains(erase(r))) r else AZERO //assert(r != AZERO)
+ − 589
}
+ − 590
}
+ − 591
+ − 592
def oneSimp(r: Rexp) : Rexp = r match {
+ − 593
case SEQ(ONE, r) => r
+ − 594
case SEQ(r1, r2) => SEQ(oneSimp(r1), r2)
+ − 595
case r => r//assert r != 0
432
+ − 596
492
+ − 597
}
431
+ − 598
+ − 599
492
+ − 600
def distinctBy4(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 601
case Nil => Nil
+ − 602
case x :: xs =>
+ − 603
//assert(acc.distinct == acc)
+ − 604
val erased = erase(x)
+ − 605
if(acc.contains(erased))
+ − 606
distinctBy4(xs, acc)
+ − 607
else{
+ − 608
val addToAcc = breakIntoTerms(erased).filter(r => !acc.contains(oneSimp(r)))
+ − 609
//val xp = pruneRexp(x, addToAcc)
+ − 610
pruneRexp(x, addToAcc) match {
+ − 611
case AZERO => distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 612
case xPrime => xPrime :: distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 613
}
431
+ − 614
}
492
+ − 615
}
+ − 616
494
+ − 617
// fun pAKC_aux :: "arexp list ⇒ arexp ⇒ rexp ⇒ arexp"
+ − 618
// where
+ − 619
// "pAKC_aux rsa r ctx = (if (L (SEQ (erase r) ( ctx) )) ⊆ (L (erase (AALTs [] rsa))) then AZERO else
+ − 620
// case r of (ASEQ bs r1 r2) ⇒
+ − 621
// bsimp_ASEQ bs (pAKC_aux rsa r1 (SEQ (erase r2) ( ctx) )) r2 |
+ − 622
// (AALTs bs rs) ⇒
+ − 623
// bsimp_AALTs bs (flts (map (λr. pAKC_aux rsa r ( ctx) ) rs) ) |
+ − 624
// r ⇒ r
+ − 625
+ − 626
// def canonicalSeq(rs: List[Rexp], acc: Rexp) = rs match {
+ − 627
// case r::Nil => SEQ(r, acc)
+ − 628
// case Nil => acc
+ − 629
// case r1::r2::Nil => SEQ(SEQ(r1, r2), acc)
+ − 630
// }
+ − 631
+ − 632
+ − 633
//the "fake" Language interpretation: just concatenates!
+ − 634
def L(acc: Rexp, rs: List[Rexp]) : Rexp = rs match {
+ − 635
case Nil => acc
+ − 636
case r :: rs1 =>
500
+ − 637
// if(acc == ONE)
+ − 638
// L(r, rs1)
+ − 639
// else
494
+ − 640
L(SEQ(acc, r), rs1)
+ − 641
}
+ − 642
500
+ − 643
def rprint(r: Rexp) : Unit = println(shortRexpOutput(r))
+ − 644
def rsprint(rs: List[Rexp]) = rs.foreach(r => println(shortRexpOutput(r)))
+ − 645
def aprint(a: ARexp) = println(shortRexpOutput(erase(a)))
+ − 646
def asprint(as: List[ARexp]) = as.foreach(a => println(shortRexpOutput(erase(a))))
+ − 647
+ − 648
def pAKC(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
+ − 649
// println("pakc")
+ − 650
// println(shortRexpOutput(erase(r)))
+ − 651
// println("acc")
+ − 652
// rsprint(acc)
+ − 653
// println("ctx---------")
+ − 654
// rsprint(ctx)
+ − 655
// println("ctx---------end")
+ − 656
// rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp))
+ − 657
+ − 658
if (breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)) {//acc.flatMap(breakIntoTerms
494
+ − 659
AZERO
+ − 660
}
+ − 661
else{
+ − 662
r match {
+ − 663
case ASEQ(bs, r1, r2) =>
500
+ − 664
(pAKC(acc, r1, erase(r2) :: ctx)) match{
494
+ − 665
case AZERO =>
+ − 666
AZERO
+ − 667
case AONE(bs1) =>
+ − 668
fuse(bs1, r2)
+ − 669
case r1p =>
+ − 670
ASEQ(bs, r1p, r2)
+ − 671
}
+ − 672
case AALTS(bs, rs0) =>
+ − 673
// println("before pruning")
+ − 674
// println(s"ctx is ")
+ − 675
// ctx.foreach(r => println(shortRexpOutput(r)))
+ − 676
// println(s"rs0 is ")
+ − 677
// rs0.foreach(r => println(shortRexpOutput(erase(r))))
500
+ − 678
// println(s"acc is ")
+ − 679
// acc.foreach(r => println(shortRexpOutput(r)))
+ − 680
rs0.map(r => pAKC(acc, r, ctx)).filter(_ != AZERO) match {
494
+ − 681
case Nil =>
+ − 682
// println("after pruning Nil")
+ − 683
AZERO
+ − 684
case r :: Nil =>
+ − 685
// println("after pruning singleton")
+ − 686
// println(shortRexpOutput(erase(r)))
+ − 687
r
+ − 688
case rs0p =>
+ − 689
// println("after pruning non-singleton")
+ − 690
AALTS(bs, rs0p)
+ − 691
}
+ − 692
case r => r
+ − 693
}
+ − 694
}
+ − 695
}
+ − 696
+ − 697
def distinctBy5(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 698
case Nil =>
+ − 699
Nil
+ − 700
case x :: xs => {
+ − 701
val erased = erase(x)
+ − 702
if(acc.contains(erased)){
+ − 703
distinctBy5(xs, acc)
+ − 704
}
+ − 705
else{
+ − 706
val pruned = pAKC(acc, x, Nil)
+ − 707
val newTerms = breakIntoTerms(erase(pruned))
+ − 708
pruned match {
+ − 709
case AZERO =>
+ − 710
distinctBy5(xs, acc)
+ − 711
case xPrime =>
+ − 712
xPrime :: distinctBy5(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 713
}
+ − 714
}
+ − 715
}
+ − 716
}
+ − 717
518
+ − 718
def attachCtx(r: ARexp, ctx: List[Rexp]) : List[Rexp] = {
+ − 719
val res = breakIntoTerms(oneSimp(L(erase(r), ctx))).map(oneSimp)
+ − 720
res
+ − 721
}
+ − 722
+ − 723
def ABIncludedByC[A, B, C](a: A, b: B, c: C, f: (A, B) => C, inclusionPred: (C, C) => Boolean) : Boolean = {
+ − 724
inclusionPred(f(a, b), c)
+ − 725
}
+ − 726
def rexpListInclusion(rs1: List[Rexp], rs2: List[Rexp]) : Boolean = {
+ − 727
rs1.forall(rs2.contains)
+ − 728
}
+ − 729
def pAKC6(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
+ − 730
// println("pakc")
+ − 731
// println(shortRexpOutput(erase(r)))
+ − 732
// println("acc")
+ − 733
// rsprint(acc)
+ − 734
// println("ctx---------")
+ − 735
// rsprint(ctx)
+ − 736
// println("ctx---------end")
+ − 737
// rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp))
+ − 738
+ − 739
// rprint(L(erase(r), ctx))
+ − 740
//breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)
+ − 741
if (ABIncludedByC(r, ctx, acc, attachCtx, rexpListInclusion)) {//acc.flatMap(breakIntoTerms
+ − 742
//println("included in acc!!!")
+ − 743
AZERO
+ − 744
}
+ − 745
else{
+ − 746
r match {
+ − 747
case ASEQ(bs, r1, r2) =>
+ − 748
(pAKC6(acc, r1, erase(r2) :: ctx)) match{
+ − 749
case AZERO =>
+ − 750
AZERO
+ − 751
case AONE(bs1) =>
+ − 752
fuse(bs1, r2)
+ − 753
case r1p =>
+ − 754
ASEQ(bs, r1p, r2)
+ − 755
}
+ − 756
case AALTS(bs, rs0) =>
+ − 757
// println("before pruning")
+ − 758
// println(s"ctx is ")
+ − 759
// ctx.foreach(r => println(shortRexpOutput(r)))
+ − 760
// println(s"rs0 is ")
+ − 761
// rs0.foreach(r => println(shortRexpOutput(erase(r))))
+ − 762
// println(s"acc is ")
+ − 763
// acc.foreach(r => println(shortRexpOutput(r)))
+ − 764
rs0.map(r => pAKC6(acc, r, ctx)).filter(_ != AZERO) match {
+ − 765
case Nil =>
+ − 766
// println("after pruning Nil")
+ − 767
AZERO
+ − 768
case r :: Nil =>
+ − 769
// println("after pruning singleton")
+ − 770
// println(shortRexpOutput(erase(r)))
+ − 771
r
+ − 772
case rs0p =>
+ − 773
// println("after pruning non-singleton")
+ − 774
AALTS(bs, rs0p)
+ − 775
}
+ − 776
case r => r
+ − 777
}
+ − 778
}
+ − 779
}
+ − 780
+ − 781
+ − 782
def distinctBy6(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 783
case Nil =>
+ − 784
Nil
+ − 785
case x :: xs => {
+ − 786
val erased = erase(x)
+ − 787
if(acc.contains(erased)){
+ − 788
distinctBy6(xs, acc)
+ − 789
}
+ − 790
else{
+ − 791
val pruned = pAKC6(acc, x, Nil)
+ − 792
val newTerms = breakIntoTerms(erase(pruned))
+ − 793
pruned match {
+ − 794
case AZERO =>
+ − 795
distinctBy6(xs, acc)
+ − 796
case xPrime =>
+ − 797
xPrime :: distinctBy6(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 798
}
+ − 799
}
+ − 800
}
+ − 801
}
431
+ − 802
492
+ − 803
def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
+ − 804
case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
+ − 805
case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
494
+ − 806
case ZERO => Nil
492
+ − 807
case _ => r::Nil
+ − 808
}
431
+ − 809
+ − 810
+ − 811
492
+ − 812
def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
+ − 813
case (ONE, bs) => (Empty, bs)
+ − 814
case (CHAR(f), bs) => (Chr(f), bs)
+ − 815
case (ALTS(r1, r2), Z::bs1) => {
+ − 816
val (v, bs2) = decode_aux(r1, bs1)
+ − 817
(Left(v), bs2)
+ − 818
}
+ − 819
case (ALTS(r1, r2), S::bs1) => {
+ − 820
val (v, bs2) = decode_aux(r2, bs1)
+ − 821
(Right(v), bs2)
+ − 822
}
+ − 823
case (SEQ(r1, r2), bs) => {
+ − 824
val (v1, bs1) = decode_aux(r1, bs)
+ − 825
val (v2, bs2) = decode_aux(r2, bs1)
+ − 826
(Sequ(v1, v2), bs2)
+ − 827
}
+ − 828
case (STAR(r1), S::bs) => {
+ − 829
val (v, bs1) = decode_aux(r1, bs)
494
+ − 830
//(v)
492
+ − 831
val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
+ − 832
(Stars(v::vs), bs2)
+ − 833
}
+ − 834
case (STAR(_), Z::bs) => (Stars(Nil), bs)
+ − 835
case (RECD(x, r1), bs) => {
+ − 836
val (v, bs1) = decode_aux(r1, bs)
+ − 837
(Rec(x, v), bs1)
+ − 838
}
+ − 839
case (NOT(r), bs) => (Nots(r.toString), bs)
431
+ − 840
}
+ − 841
492
+ − 842
def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
+ − 843
case (v, Nil) => v
+ − 844
case _ => throw new Exception("Not decodable")
+ − 845
}
+ − 846
+ − 847
+ − 848
431
+ − 849
def blexing_simp(r: Rexp, s: String) : Val = {
+ − 850
val bit_code = blex_simp(internalise(r), s.toList)
+ − 851
decode(r, bit_code)
492
+ − 852
}
+ − 853
def simpBlexer(r: Rexp, s: String) : Val = {
+ − 854
Try(blexing_simp(r, s)).getOrElse(Failure)
+ − 855
}
431
+ − 856
492
+ − 857
def strong_blexing_simp(r: Rexp, s: String) : Val = {
+ − 858
decode(r, strong_blex_simp(internalise(r), s.toList))
+ − 859
}
+ − 860
494
+ − 861
def strong_blexing_simp5(r: Rexp, s: String) : Val = {
+ − 862
decode(r, strong_blex_simp5(internalise(r), s.toList))
+ − 863
}
+ − 864
+ − 865
492
+ − 866
def strongBlexer(r: Rexp, s: String) : Val = {
+ − 867
Try(strong_blexing_simp(r, s)).getOrElse(Failure)
+ − 868
}
431
+ − 869
494
+ − 870
def strongBlexer5(r: Rexp, s: String): Val = {
+ − 871
Try(strong_blexing_simp5(r, s)).getOrElse(Failure)
+ − 872
}
+ − 873
492
+ − 874
def strong_blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 875
case Nil => {
+ − 876
if (bnullable(r)) {
+ − 877
//println(asize(r))
+ − 878
val bits = mkepsBC(r)
+ − 879
bits
431
+ − 880
}
492
+ − 881
else
+ − 882
throw new Exception("Not matched")
431
+ − 883
}
492
+ − 884
case c::cs => {
+ − 885
val der_res = bder(c,r)
+ − 886
val simp_res = strongBsimp(der_res)
+ − 887
strong_blex_simp(simp_res, cs)
+ − 888
}
+ − 889
}
431
+ − 890
494
+ − 891
def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match {
+ − 892
case Nil => {
+ − 893
if (bnullable(r)) {
+ − 894
//println(asize(r))
+ − 895
val bits = mkepsBC(r)
+ − 896
bits
+ − 897
}
+ − 898
else
+ − 899
throw new Exception("Not matched")
+ − 900
}
+ − 901
case c::cs => {
+ − 902
val der_res = bder(c,r)
+ − 903
val simp_res = strongBsimp5(der_res)
+ − 904
strong_blex_simp5(simp_res, cs)
+ − 905
}
+ − 906
}
431
+ − 907
+ − 908
+ − 909
def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
+ − 910
case Nil => r
435
+ − 911
case c::s =>
494
+ − 912
//println(erase(r))
435
+ − 913
bders_simp(s, bsimp(bder(c, r)))
431
+ − 914
}
+ − 915
494
+ − 916
def bdersStrong5(s: List[Char], r: ARexp) : ARexp = s match {
+ − 917
case Nil => r
+ − 918
case c::s => bdersStrong5(s, strongBsimp5(bder(c, r)))
+ − 919
}
518
+ − 920
def bdersStrong6(s: List[Char], r: ARexp) : ARexp = s match {
+ − 921
case Nil => r
+ − 922
case c::s => bdersStrong6(s, strongBsimp6(bder(c, r)))
+ − 923
}
431
+ − 924
def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
+ − 925
492
+ − 926
def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
431
+ − 927
case Nil => r
492
+ − 928
case c::s => bdersStrong(s, strongBsimp(bder(c, r)))
431
+ − 929
}
+ − 930
492
+ − 931
def bdersStrongRexp(s: String, r: Rexp) : ARexp = bdersStrong(s.toList, internalise(r))
431
+ − 932
+ − 933
def erase(r:ARexp): Rexp = r match{
+ − 934
case AZERO => ZERO
+ − 935
case AONE(_) => ONE
+ − 936
case ACHAR(bs, c) => CHAR(c)
+ − 937
case AALTS(bs, Nil) => ZERO
+ − 938
case AALTS(bs, a::Nil) => erase(a)
+ − 939
case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
+ − 940
case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
+ − 941
case ASTAR(cs, r)=> STAR(erase(r))
+ − 942
case ANOT(bs, r) => NOT(erase(r))
+ − 943
case AANYCHAR(bs) => ANYCHAR
+ − 944
}
+ − 945
+ − 946
492
+ − 947
def allCharSeq(r: Rexp) : Boolean = r match {
+ − 948
case CHAR(c) => true
+ − 949
case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
+ − 950
case _ => false
+ − 951
}
+ − 952
+ − 953
def flattenSeq(r: Rexp) : String = r match {
+ − 954
case CHAR(c) => c.toString
+ − 955
case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
+ − 956
case _ => throw new Error("flatten unflattenable rexp")
+ − 957
}
431
+ − 958
492
+ − 959
def shortRexpOutput(r: Rexp) : String = r match {
+ − 960
case CHAR(c) => c.toString
+ − 961
case ONE => "1"
+ − 962
case ZERO => "0"
+ − 963
case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 964
case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 965
case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
+ − 966
case STAR(STAR(r)) => "(..)*"// ++ shortRexpOutput(r) ++ "]*"
+ − 967
case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
+ − 968
//case RTOP => "RTOP"
+ − 969
}
+ − 970
431
+ − 971
492
+ − 972
def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 973
case Nil => {
+ − 974
if (bnullable(r)) {
+ − 975
val bits = mkepsBC(r)
+ − 976
bits
+ − 977
}
+ − 978
else
+ − 979
throw new Exception("Not matched")
+ − 980
}
+ − 981
case c::cs => {
+ − 982
val der_res = bder(c,r)
+ − 983
val simp_res = bsimp(der_res)
+ − 984
blex_simp(simp_res, cs)
+ − 985
}
431
+ − 986
}
+ − 987
+ − 988
492
+ − 989
+ − 990
+ − 991
def size(r: Rexp) : Int = r match {
+ − 992
case ZERO => 1
+ − 993
case ONE => 1
+ − 994
case CHAR(_) => 1
+ − 995
case ANYCHAR => 1
+ − 996
case NOT(r0) => 1 + size(r0)
+ − 997
case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 998
case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
+ − 999
case STAR(r) => 1 + size(r)
431
+ − 1000
}
+ − 1001
492
+ − 1002
def asize(a: ARexp) = size(erase(a))
431
+ − 1003
+ − 1004
//pder related
+ − 1005
type Mon = (Char, Rexp)
+ − 1006
type Lin = Set[Mon]
+ − 1007
+ − 1008
def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
+ − 1009
case ZERO => Set()
+ − 1010
case ONE => rs
+ − 1011
case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) )
+ − 1012
}
+ − 1013
def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
+ − 1014
case ZERO => Set()
+ − 1015
case ONE => l
+ − 1016
case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) } )
+ − 1017
}
+ − 1018
def lf(r: Rexp): Lin = r match {
+ − 1019
case ZERO => Set()
+ − 1020
case ONE => Set()
+ − 1021
case CHAR(f) => {
+ − 1022
//val Some(c) = alphabet.find(f)
+ − 1023
Set((f, ONE))
+ − 1024
}
+ − 1025
case ALTS(r1, r2) => {
+ − 1026
lf(r1 ) ++ lf(r2)
+ − 1027
}
+ − 1028
case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
+ − 1029
case SEQ(r1, r2) =>{
+ − 1030
if (nullable(r1))
+ − 1031
cir_prod(lf(r1), r2) ++ lf(r2)
+ − 1032
else
+ − 1033
cir_prod(lf(r1), r2)
+ − 1034
}
+ − 1035
}
+ − 1036
def lfs(r: Set[Rexp]): Lin = {
+ − 1037
r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
+ − 1038
}
+ − 1039
+ − 1040
def pder(x: Char, t: Rexp): Set[Rexp] = {
+ − 1041
val lft = lf(t)
+ − 1042
(lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
+ − 1043
}
+ − 1044
def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
+ − 1045
case x::xs => pders(xs, pder(x, t))
+ − 1046
case Nil => Set(t)
+ − 1047
}
+ − 1048
def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
+ − 1049
case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 1050
case Nil => ts
+ − 1051
}
+ − 1052
def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
+ − 1053
def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
+ − 1054
//all implementation of partial derivatives that involve set union are potentially buggy
+ − 1055
//because they don't include the original regular term before they are pdered.
+ − 1056
//now only pderas is fixed.
+ − 1057
def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
+ − 1058
def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
+ − 1059
def awidth(r: Rexp) : Int = r match {
+ − 1060
case CHAR(c) => 1
+ − 1061
case SEQ(r1, r2) => awidth(r1) + awidth(r2)
+ − 1062
case ALTS(r1, r2) => awidth(r1) + awidth(r2)
+ − 1063
case ONE => 0
+ − 1064
case ZERO => 0
+ − 1065
case STAR(r) => awidth(r)
+ − 1066
}
+ − 1067
//def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
+ − 1068
//def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
+ − 1069
def o(r: Rexp) = if (nullable(r)) ONE else ZERO
+ − 1070
//def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
+ − 1071
def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
+ − 1072
case ZERO => Set[Rexp]()
+ − 1073
case ONE => Set[Rexp]()
+ − 1074
case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
+ − 1075
case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
+ − 1076
case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
+ − 1077
case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
+ − 1078
}
+ − 1079
def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
+ − 1080
case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 1081
case Nil => ts
+ − 1082
}
+ − 1083
def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
+ − 1084
+ − 1085
+ − 1086
+ − 1087
def starPrint(r: Rexp) : Unit = r match {
+ − 1088
+ − 1089
case SEQ(head, rstar) =>
+ − 1090
println(shortRexpOutput(head) ++ "~STARREG")
+ − 1091
case STAR(rstar) =>
+ − 1092
println("STARREG")
+ − 1093
case ALTS(r1, r2) =>
+ − 1094
println("(")
+ − 1095
starPrint(r1)
+ − 1096
println("+")
+ − 1097
starPrint(r2)
+ − 1098
println(")")
+ − 1099
case ZERO => println("0")
+ − 1100
}
+ − 1101
+ − 1102
// @arg(doc = "small tests")
516
+ − 1103
def n_astar_list(d: Int) : Rexp = {
+ − 1104
if(d == 0) STAR("a")
+ − 1105
else ALTS(STAR("a" * d), n_astar_list(d - 1))
+ − 1106
}
+ − 1107
def n_astar_alts(d: Int) : Rexp = d match {
+ − 1108
case 0 => n_astar_list(0)
+ − 1109
case d => STAR(n_astar_list(d))
+ − 1110
// case r1 :: r2 :: Nil => ALTS(r1, r2)
+ − 1111
// case r1 :: Nil => r1
+ − 1112
// case r :: rs => ALTS(r, n_astar_alts(rs))
+ − 1113
// case Nil => throw new Error("should give at least 1 elem")
+ − 1114
}
+ − 1115
def n_astar_aux(d: Int) = {
+ − 1116
if(d == 0) n_astar_alts(0)
+ − 1117
else ALTS(n_astar_alts(d), n_astar_alts(d - 1))
+ − 1118
}
+ − 1119
+ − 1120
def n_astar(d: Int) : Rexp = STAR(n_astar_aux(d))
+ − 1121
//val STARREG = n_astar(3)
+ − 1122
// ( STAR("a") |
+ − 1123
// ("a" | "aa").% |
+ − 1124
// ( "a" | "aa" | "aaa").%
+ − 1125
// ).%
+ − 1126
//( "a" | "aa" | "aaa" | "aaaa").% |
+ − 1127
//( "a" | "aa" | "aaa" | "aaaa" | "aaaaa").%
431
+ − 1128
(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa"| ( STAR("aaaaaaa")) | STAR("aaaaaaaaaaa"))).%).%).%
+ − 1129
+ − 1130
// @main
492
+ − 1131
516
+ − 1132
def lcm(list: Seq[Int]):Int=list.foldLeft(1:Int){
+ − 1133
(a, b) => b * a /
+ − 1134
Stream.iterate((a,b)){case (x,y) => (y, x%y)}.dropWhile(_._2 != 0).head._1.abs
+ − 1135
}
492
+ − 1136
431
+ − 1137
def small() = {
516
+ − 1138
//val pderSTAR = pderUNIV(STARREG)
492
+ − 1139
516
+ − 1140
//val refSize = pderSTAR.map(size(_)).sum
518
+ − 1141
for(n <- 5 to 5){
516
+ − 1142
val STARREG = n_astar(n)
+ − 1143
val iMax = (lcm((1 to n).toList))
518
+ − 1144
for(i <- 1 to iMax + 2){// 100, 400, 800, 840, 841, 900
516
+ − 1145
val prog0 = "a" * i
+ − 1146
//println(s"test: $prog0")
518
+ − 1147
print(i)
516
+ − 1148
print(" ")
+ − 1149
// print(i)
+ − 1150
// print(" ")
+ − 1151
println(asize(bders_simp(prog0.toList, internalise(STARREG))))
+ − 1152
}
492
+ − 1153
}
431
+ − 1154
}
+ − 1155
492
+ − 1156
def generator_test() {
+ − 1157
494
+ − 1158
// test(rexp(7), 1000) { (r: Rexp) =>
+ − 1159
// val ss = stringsFromRexp(r)
+ − 1160
// val boolList = ss.map(s => {
+ − 1161
// val simpVal = simpBlexer(r, s)
+ − 1162
// val strongVal = strongBlexer(r, s)
+ − 1163
// // println(simpVal)
+ − 1164
// // println(strongVal)
+ − 1165
// (simpVal == strongVal) && (simpVal != None)
+ − 1166
// })
+ − 1167
// !boolList.exists(b => b == false)
+ − 1168
// }
+ − 1169
// test(single(STAR(ALTS(STAR(CHAR('c')),ALTS(CHAR('c'),ZERO)))), 100000) { (r: Rexp) =>
+ − 1170
// val ss = stringsFromRexp(r)
+ − 1171
// val boolList = ss.map(s => {
+ − 1172
// val bdStrong = bdersStrong(s.toList, internalise(r))
+ − 1173
// val bdStrong5 = bdersStrong5(s.toList, internalise(r))
+ − 1174
// // println(shortRexpOutput(r))
+ − 1175
// // println(s)
+ − 1176
// // println(strongVal)
+ − 1177
// // println(strongVal5)
+ − 1178
// // (strongVal == strongVal5)
493
+ − 1179
494
+ − 1180
// if(asize(bdStrong5) > asize(bdStrong)){
+ − 1181
// println(s)
+ − 1182
// println(shortRexpOutput(erase(bdStrong5)))
+ − 1183
// println(shortRexpOutput(erase(bdStrong)))
+ − 1184
// }
+ − 1185
// asize(bdStrong5) <= asize(bdStrong)
+ − 1186
// })
+ − 1187
// !boolList.exists(b => b == false)
+ − 1188
// }
500
+ − 1189
//*** example where bdStrong5 has a smaller size than bdStrong
+ − 1190
// test(single(STAR(SEQ(ALTS(SEQ(STAR(CHAR('a')),ALTS(ALTS(ONE,ZERO),SEQ(ONE,ONE))),CHAR('a')),ONE))), 1) { (r: Rexp) =>
+ − 1191
//*** depth 5 example bdStrong5 larger size than bdStrong
+ − 1192
// test(single(STAR(SEQ(SEQ(ALTS(CHAR('b'),STAR(CHAR('b'))),CHAR('b')),(ALTS(STAR(CHAR('c')), ONE))))), 1) {(r: Rexp) =>
+ − 1193
+ − 1194
+ − 1195
+ − 1196
//sanity check from Christian's request
+ − 1197
// val r = ("a" | "ab") ~ ("bc" | "c")
+ − 1198
// val a = internalise(r)
+ − 1199
// val aval = blexing_simp(r, "abc")
+ − 1200
// println(aval)
+ − 1201
+ − 1202
//sample counterexample:(depth 7)
+ − 1203
//STAR(SEQ(ALTS(STAR(STAR(STAR(STAR(CHAR(c))))),ALTS(CHAR(c),CHAR(b))),ALTS(ZERO,SEQ(ALTS(ALTS(STAR(CHAR(c)),SEQ(CHAR(b),CHAR(a))),CHAR(c)),STAR(ALTS(ALTS(ONE,CHAR(a)),STAR(CHAR(c))))))))
+ − 1204
//(depth5)
+ − 1205
//STAR(SEQ(ALTS(ALTS(STAR(CHAR(b)),SEQ(ONE,CHAR(b))),SEQ(STAR(CHAR(a)),CHAR(b))),ALTS(ZERO,ALTS(STAR(CHAR(b)),STAR(CHAR(a))))))
518
+ − 1206
test(rexp(4), 100000) { (r: Rexp) =>
500
+ − 1207
// ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) =>
493
+ − 1208
val ss = stringsFromRexp(r)
518
+ − 1209
val boolList = ss.filter(s => s != "").map(s => {
+ − 1210
//val bdStrong = bdersStrong(s.toList, internalise(r))
+ − 1211
val bdStrong6 = bdersStrong6(s.toList, internalise(r))
+ − 1212
val bdStrong6Set = breakIntoTerms(erase(bdStrong6))
+ − 1213
val pdersSet = pderUNIV(r).flatMap(r => breakIntoTerms(r))
494
+ − 1214
// println(s)
518
+ − 1215
// println(bdStrong6Set.size, pdersSet.size)
+ − 1216
bdStrong6Set.size <= pdersSet.size
493
+ − 1217
})
494
+ − 1218
// println(boolList)
500
+ − 1219
//!boolList.exists(b => b == false)
493
+ − 1220
!boolList.exists(b => b == false)
492
+ − 1221
}
493
+ − 1222
+ − 1223
492
+ − 1224
}
518
+ − 1225
// small()
+ − 1226
generator_test()
493
+ − 1227
518
+ − 1228
def counterexample_check() {
+ − 1229
val r = STAR(SEQ(ALTS(ALTS(CHAR('b'),CHAR('c')),
+ − 1230
SEQ(CHAR('b'),CHAR('b'))),ALTS(SEQ(ONE,CHAR('b')),CHAR('a'))))
+ − 1231
val s = "bbbb"
+ − 1232
val bdStrong5 = bdersStrong6(s.toList, internalise(r))
+ − 1233
val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
+ − 1234
val pdersSet = pderUNIV(r)//.map(oneSimp).flatMap(r => breakIntoTerms(r))
+ − 1235
println("original regex ")
+ − 1236
rprint(r)
+ − 1237
println("after strong bsimp")
+ − 1238
aprint(bdStrong5)
+ − 1239
println("turned into a set %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ")
+ − 1240
rsprint(bdStrong5Set)
+ − 1241
println("after pderUNIV")
+ − 1242
rsprint(pdersSet.toList)
+ − 1243
}
+ − 1244
// counterexample_check()
516
+ − 1245
// 1
518
+ − 1246
def newStrong_test() {
+ − 1247
val r2 = (CHAR('b') | ONE)
+ − 1248
val r0 = CHAR('d')
+ − 1249
val r1 = (ONE | CHAR('c'))
+ − 1250
val expRexp = (SEQ(r2, r0) | SEQ(SEQ(r1, r2), r0))
+ − 1251
println(s"original regex is: ")
+ − 1252
rprint(expRexp)
+ − 1253
val expSimp5 = strongBsimp5(internalise(expRexp))
+ − 1254
val expSimp6 = strongBsimp6(internalise(expRexp))
+ − 1255
aprint(expSimp5)
+ − 1256
aprint(expSimp6)
+ − 1257
}
+ − 1258
// newStrong_test()
516
+ − 1259
// SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))),
+ − 1260
// SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))),
+ − 1261
// STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))),
+ − 1262
// SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE))
493
+ − 1263
+ − 1264
+ − 1265
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))
+ − 1266
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))