518
+ − 1
//Strong Bsimp to obtain Antimirov's cubic bound
+ − 2
431
+ − 3
// A simple lexer inspired by work of Sulzmann & Lu
+ − 4
//==================================================
+ − 5
//
+ − 6
// Call the test cases with
+ − 7
//
+ − 8
// amm lexer.sc small
+ − 9
// amm lexer.sc fib
+ − 10
// amm lexer.sc loops
+ − 11
// amm lexer.sc email
+ − 12
//
+ − 13
// amm lexer.sc all
+ − 14
514
+ − 15
431
+ − 16
+ − 17
// regular expressions including records
+ − 18
abstract class Rexp
+ − 19
case object ZERO extends Rexp
+ − 20
case object ONE extends Rexp
+ − 21
case object ANYCHAR extends Rexp
+ − 22
case class CHAR(c: Char) extends Rexp
+ − 23
case class ALTS(r1: Rexp, r2: Rexp) extends Rexp
539
+ − 24
case class ALTSS(rs: List[Rexp]) extends Rexp
431
+ − 25
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+ − 26
case class STAR(r: Rexp) extends Rexp
+ − 27
case class RECD(x: String, r: Rexp) extends Rexp
+ − 28
case class NTIMES(n: Int, r: Rexp) extends Rexp
+ − 29
case class OPTIONAL(r: Rexp) extends Rexp
+ − 30
case class NOT(r: Rexp) extends Rexp
591
+ − 31
// records for extracting strings or tokens
+ − 32
431
+ − 33
// values
+ − 34
abstract class Val
492
+ − 35
case object Failure extends Val
431
+ − 36
case object Empty extends Val
+ − 37
case class Chr(c: Char) extends Val
+ − 38
case class Sequ(v1: Val, v2: Val) extends Val
+ − 39
case class Left(v: Val) extends Val
+ − 40
case class Right(v: Val) extends Val
+ − 41
case class Stars(vs: List[Val]) extends Val
+ − 42
case class Rec(x: String, v: Val) extends Val
+ − 43
case class Ntime(vs: List[Val]) extends Val
+ − 44
case class Optionall(v: Val) extends Val
+ − 45
case class Nots(s: String) extends Val
+ − 46
+ − 47
+ − 48
+ − 49
abstract class Bit
+ − 50
case object Z extends Bit
+ − 51
case object S extends Bit
+ − 52
+ − 53
+ − 54
type Bits = List[Bit]
+ − 55
+ − 56
abstract class ARexp
+ − 57
case object AZERO extends ARexp
+ − 58
case class AONE(bs: Bits) extends ARexp
+ − 59
case class ACHAR(bs: Bits, c: Char) extends ARexp
+ − 60
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp
+ − 61
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp
+ − 62
case class ASTAR(bs: Bits, r: ARexp) extends ARexp
+ − 63
case class ANOT(bs: Bits, r: ARexp) extends ARexp
+ − 64
case class AANYCHAR(bs: Bits) extends ARexp
+ − 65
514
+ − 66
import scala.util.Try
+ − 67
492
+ − 68
trait Generator[+T] {
591
+ − 69
self => // an alias for "this"
492
+ − 70
def generate(): T
591
+ − 71
492
+ − 72
def gen(n: Int) : List[T] =
+ − 73
if (n == 0) Nil else self.generate() :: gen(n - 1)
591
+ − 74
492
+ − 75
def map[S](f: T => S): Generator[S] = new Generator[S] {
+ − 76
def generate = f(self.generate())
+ − 77
}
+ − 78
def flatMap[S](f: T => Generator[S]): Generator[S] = new Generator[S] {
+ − 79
def generate = f(self.generate()).generate()
+ − 80
}
+ − 81
+ − 82
+ − 83
}
+ − 84
591
+ − 85
// tests a property according to a given random generator
+ − 86
def test[T](r: Generator[T], amount: Int = 100)(pred: T => Boolean) {
+ − 87
for (_ <- 0 until amount) {
+ − 88
val value = r.generate()
+ − 89
assert(pred(value), s"Test failed for: $value")
492
+ − 90
}
591
+ − 91
println(s"Test passed $amount times")
+ − 92
}
+ − 93
def test2[T, S](r: Generator[T], s: Generator[S], amount: Int = 100)(pred: (T, S) => Boolean) {
+ − 94
for (_ <- 0 until amount) {
+ − 95
val valueR = r.generate()
+ − 96
val valueS = s.generate()
+ − 97
assert(pred(valueR, valueS), s"Test failed for: $valueR, $valueS")
492
+ − 98
}
591
+ − 99
println(s"Test passed $amount times")
+ − 100
}
492
+ − 101
+ − 102
// random integers
+ − 103
val integers = new Generator[Int] {
+ − 104
val rand = new java.util.Random
+ − 105
def generate() = rand.nextInt()
+ − 106
}
+ − 107
+ − 108
// random booleans
+ − 109
val booleans = integers.map(_ > 0)
591
+ − 110
492
+ − 111
// random integers in the range lo and high
+ − 112
def range(lo: Int, hi: Int): Generator[Int] =
+ − 113
for (x <- integers) yield (lo + x.abs % (hi - lo)).abs
+ − 114
+ − 115
// random characters
+ − 116
def chars_range(lo: Char, hi: Char) = range(lo, hi).map(_.toChar)
+ − 117
val chars = chars_range('a', 'z')
+ − 118
+ − 119
+ − 120
def oneOf[T](xs: T*): Generator[T] =
+ − 121
for (idx <- range(0, xs.length)) yield xs(idx)
591
+ − 122
492
+ − 123
def single[T](x: T) = new Generator[T] {
+ − 124
def generate() = x
+ − 125
}
+ − 126
+ − 127
def pairs[T, U](t: Generator[T], u: Generator[U]): Generator[(T, U)] =
+ − 128
for (x <- t; y <- u) yield (x, y)
+ − 129
+ − 130
// lists
+ − 131
def emptyLists = single(Nil)
+ − 132
+ − 133
def nonEmptyLists : Generator[List[Int]] =
+ − 134
for (head <- integers; tail <- lists) yield head :: tail
+ − 135
+ − 136
def lists: Generator[List[Int]] = for {
+ − 137
kind <- booleans
+ − 138
list <- if (kind) emptyLists else nonEmptyLists
+ − 139
} yield list
+ − 140
+ − 141
def char_list(len: Int): Generator[List[Char]] = {
+ − 142
if(len <= 0) single(Nil)
+ − 143
else{
+ − 144
for {
500
+ − 145
c <- chars_range('a', 'c')
492
+ − 146
tail <- char_list(len - 1)
+ − 147
} yield c :: tail
+ − 148
}
+ − 149
}
+ − 150
+ − 151
def strings(len: Int): Generator[String] = for(cs <- char_list(len)) yield cs.toString
+ − 152
493
+ − 153
def sampleString(r: Rexp) : List[String] = r match {
+ − 154
case STAR(r) => stringsFromRexp(r).flatMap(s => List("", s, s ++ s))//only generate 0, 1, 2 reptitions
+ − 155
case SEQ(r1, r2) => stringsFromRexp(r1).flatMap(s1 => stringsFromRexp(r2).map(s2 => s1 ++ s2) )
+ − 156
case ALTS(r1, r2) => throw new Error(s" Rexp ${r} not expected: all alternatives are supposed to have been opened up")
+ − 157
case ONE => "" :: Nil
+ − 158
case ZERO => Nil
+ − 159
case CHAR(c) => c.toString :: Nil
+ − 160
+ − 161
}
+ − 162
+ − 163
def stringsFromRexp(r: Rexp) : List[String] =
+ − 164
breakIntoTerms(r).flatMap(r => sampleString(r))
+ − 165
492
+ − 166
+ − 167
// (simple) binary trees
+ − 168
trait Tree[T]
+ − 169
case class Inner[T](left: Tree[T], right: Tree[T]) extends Tree[T]
+ − 170
case class Leaf[T](x: T) extends Tree[T]
+ − 171
+ − 172
def leafs[T](t: Generator[T]): Generator[Leaf[T]] =
+ − 173
for (x <- t) yield Leaf(x)
+ − 174
+ − 175
def inners[T](t: Generator[T]): Generator[Inner[T]] =
+ − 176
for (l <- trees(t); r <- trees(t)) yield Inner(l, r)
+ − 177
+ − 178
def trees[T](t: Generator[T]): Generator[Tree[T]] =
+ − 179
for (kind <- range(0, 2);
+ − 180
tree <- if (kind == 0) leafs(t) else inners(t)) yield tree
+ − 181
+ − 182
// regular expressions
+ − 183
+ − 184
// generates random leaf-regexes; prefers CHAR-regexes
+ − 185
def leaf_rexp() : Generator[Rexp] =
+ − 186
for (kind <- range(0, 5);
+ − 187
c <- chars_range('a', 'd')) yield
591
+ − 188
kind match {
+ − 189
case 0 => ZERO
+ − 190
case 1 => ONE
+ − 191
case _ => CHAR(c)
+ − 192
}
492
+ − 193
+ − 194
// generates random inner regexes with maximum depth d
+ − 195
def inner_rexp(d: Int) : Generator[Rexp] =
+ − 196
for (kind <- range(0, 3);
+ − 197
l <- rexp(d);
+ − 198
r <- rexp(d))
591
+ − 199
yield kind match {
+ − 200
case 0 => ALTS(l, r)
+ − 201
case 1 => SEQ(l, r)
+ − 202
case 2 => STAR(r)
+ − 203
}
492
+ − 204
+ − 205
// generates random regexes with maximum depth d;
+ − 206
// prefers inner regexes in 2/3 of the cases
+ − 207
def rexp(d: Int = 100): Generator[Rexp] =
+ − 208
for (kind <- range(0, 3);
+ − 209
r <- if (d <= 0 || kind == 0) leaf_rexp() else inner_rexp(d - 1)) yield r
+ − 210
+ − 211
+ − 212
// some test functions for rexps
+ − 213
def height(r: Rexp) : Int = r match {
+ − 214
case ZERO => 1
+ − 215
case ONE => 1
+ − 216
case CHAR(_) => 1
+ − 217
case ALTS(r1, r2) => 1 + List(height(r1), height(r2)).max
+ − 218
case SEQ(r1, r2) => 1 + List(height(r1), height(r2)).max
+ − 219
case STAR(r) => 1 + height(r)
+ − 220
}
+ − 221
+ − 222
// def size(r: Rexp) : Int = r match {
+ − 223
// case ZERO => 1
+ − 224
// case ONE => 1
+ − 225
// case CHAR(_) => 1
+ − 226
// case ALTS(r1, r2) => 1 + size(r1) + size(r2)
+ − 227
// case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 228
// case STAR(r) => 1 + size(r)
+ − 229
// }
+ − 230
+ − 231
// randomly subtracts 1 or 2 from the STAR case
+ − 232
def size_faulty(r: Rexp) : Int = r match {
+ − 233
case ZERO => 1
+ − 234
case ONE => 1
+ − 235
case CHAR(_) => 1
+ − 236
case ALTS(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
+ − 237
case SEQ(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
+ − 238
case STAR(r) => 1 + size_faulty(r) - range(0, 2).generate
+ − 239
}
+ − 240
431
+ − 241
591
+ − 242
431
+ − 243
// some convenience for typing in regular expressions
+ − 244
+ − 245
def charlist2rexp(s : List[Char]): Rexp = s match {
+ − 246
case Nil => ONE
+ − 247
case c::Nil => CHAR(c)
+ − 248
case c::s => SEQ(CHAR(c), charlist2rexp(s))
+ − 249
}
+ − 250
implicit def string2rexp(s : String) : Rexp =
+ − 251
charlist2rexp(s.toList)
+ − 252
591
+ − 253
implicit def RexpOps(r: Rexp) = new {
+ − 254
def | (s: Rexp) = ALTS(r, s)
+ − 255
def % = STAR(r)
+ − 256
def ~ (s: Rexp) = SEQ(r, s)
+ − 257
}
431
+ − 258
591
+ − 259
implicit def stringOps(s: String) = new {
+ − 260
def | (r: Rexp) = ALTS(s, r)
+ − 261
def | (r: String) = ALTS(s, r)
+ − 262
def % = STAR(s)
+ − 263
def ~ (r: Rexp) = SEQ(s, r)
+ − 264
def ~ (r: String) = SEQ(s, r)
+ − 265
def $ (r: Rexp) = RECD(s, r)
+ − 266
}
431
+ − 267
591
+ − 268
def nullable(r: Rexp) : Boolean = r match {
+ − 269
case ZERO => false
+ − 270
case ONE => true
+ − 271
case CHAR(_) => false
+ − 272
case ANYCHAR => false
+ − 273
case ALTS(r1, r2) => nullable(r1) || nullable(r2)
+ − 274
case ALTSS(rs) => rs.exists(nullable)
+ − 275
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ − 276
case STAR(_) => true
+ − 277
case RECD(_, r1) => nullable(r1)
+ − 278
case NTIMES(n, r) => if (n == 0) true else nullable(r)
+ − 279
case OPTIONAL(r) => true
+ − 280
case NOT(r) => !nullable(r)
+ − 281
}
431
+ − 282
591
+ − 283
def der(c: Char, r: Rexp) : Rexp = r match {
+ − 284
case ZERO => ZERO
+ − 285
case ONE => ZERO
+ − 286
case CHAR(d) => if (c == d) ONE else ZERO
+ − 287
case ANYCHAR => ONE
+ − 288
case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
+ − 289
case ALTSS(rs) => ALTSS(rs.map(der(c, _)))
+ − 290
case SEQ(r1, r2) =>
+ − 291
if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
+ − 292
else SEQ(der(c, r1), r2)
+ − 293
case STAR(r) => SEQ(der(c, r), STAR(r))
+ − 294
case RECD(_, r1) => der(c, r1)
+ − 295
case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
+ − 296
case OPTIONAL(r) => der(c, r)
+ − 297
case NOT(r) => NOT(der(c, r))
+ − 298
}
431
+ − 299
591
+ − 300
def ders(s: List[Char], r: Rexp) : Rexp = s match {
+ − 301
case Nil => r
+ − 302
case c :: cs => ders(cs, der(c, r))
+ − 303
}
539
+ − 304
591
+ − 305
def ders_simp(s: List[Char], r: Rexp) : Rexp = s match {
+ − 306
case Nil => simp(r)
+ − 307
case c :: cs => ders_simp(cs, simp(der(c, r)))
+ − 308
}
539
+ − 309
+ − 310
591
+ − 311
def simp2(r: Rexp) : Rexp = r match {
+ − 312
case SEQ(r1, r2) =>
+ − 313
(simp2(r1), simp2(r2)) match {
+ − 314
case (ZERO, _) => ZERO
+ − 315
case (_, ZERO) => ZERO
+ − 316
case (ONE, r2s) => r2s
+ − 317
case (r1s, ONE) => r1s
+ − 318
case (r1s, r2s) =>
+ − 319
SEQ(r1s, r2s)
+ − 320
}
+ − 321
case ALTS(r1, r2) =>
+ − 322
val r1r2s = fltsplain(simp2(r1) :: simp2(r2) :: Nil).distinct
+ − 323
r1r2s match {
+ − 324
case Nil => ZERO
+ − 325
case r :: Nil => r
+ − 326
case r1 :: r2 :: rs =>
+ − 327
ALTSS(r1 :: r2 :: rs)
+ − 328
}
+ − 329
case ALTSS(rs) =>
+ − 330
// println(rs)
+ − 331
(fltsplain(rs.map(simp2))).distinct match {
+ − 332
case Nil => ZERO
+ − 333
case r :: Nil => r
+ − 334
case r1 :: r2 :: rs =>
+ − 335
ALTSS(r1 :: r2 :: rs)
+ − 336
}
+ − 337
case r => r
+ − 338
}
539
+ − 339
+ − 340
591
+ − 341
def simp(r: Rexp) : Rexp = r match {
+ − 342
case SEQ(r1, r2) =>
+ − 343
(simp(r1), simp(r2)) match {
+ − 344
case (ZERO, _) => ZERO
+ − 345
case (_, ZERO) => ZERO
+ − 346
case (ONE, r2s) => r2s
+ − 347
case (r1s, ONE) => r1s
+ − 348
case (r1s, r2s) => SEQ(r1s, r2s)
+ − 349
}
+ − 350
case ALTS(r1, r2) => {
+ − 351
(simp(r1), simp(r2)) match {
+ − 352
case (ZERO, r2s) => r2s
+ − 353
case (r1s, ZERO) => r1s
+ − 354
case (r1s, r2s) =>
+ − 355
if(r1s == r2s) r1s else ALTS(r1s, r2s)
+ − 356
}
+ − 357
}
+ − 358
case r => r
539
+ − 359
}
+ − 360
591
+ − 361
def fltsplain(rs: List[Rexp]) : List[Rexp] = rs match {
+ − 362
case Nil => Nil
+ − 363
case ZERO :: rs => fltsplain(rs)
+ − 364
case ALTS(r1, r2) :: rs => r1 :: r2 :: fltsplain(rs)
+ − 365
case ALTSS(rs) :: rs1 => rs ::: fltsplain(rs1)
+ − 366
case r :: rs => r :: fltsplain(rs)
+ − 367
}
539
+ − 368
+ − 369
+ − 370
+ − 371
591
+ − 372
def matcher(s: String, r: Rexp) : Boolean =
+ − 373
nullable(ders(s.toList, r))
539
+ − 374
591
+ − 375
def simp_matcher(s: String, r: Rexp) : Boolean =
+ − 376
nullable(ders_simp(s.toList, r))
539
+ − 377
431
+ − 378
591
+ − 379
// extracts a string from a value
+ − 380
def flatten(v: Val) : String = v match {
+ − 381
case Empty => ""
+ − 382
case Chr(c) => c.toString
+ − 383
case Left(v) => flatten(v)
+ − 384
case Right(v) => flatten(v)
+ − 385
case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
+ − 386
case Stars(vs) => vs.map(flatten).mkString
+ − 387
case Ntime(vs) => vs.map(flatten).mkString
+ − 388
case Optionall(v) => flatten(v)
+ − 389
case Rec(_, v) => flatten(v)
+ − 390
}
431
+ − 391
+ − 392
591
+ − 393
// extracts an environment from a value;
+ − 394
// used for tokenising a string
+ − 395
def env(v: Val) : List[(String, String)] = v match {
+ − 396
case Empty => Nil
+ − 397
case Chr(c) => Nil
+ − 398
case Left(v) => env(v)
+ − 399
case Right(v) => env(v)
+ − 400
case Sequ(v1, v2) => env(v1) ::: env(v2)
+ − 401
case Stars(vs) => vs.flatMap(env)
+ − 402
case Ntime(vs) => vs.flatMap(env)
+ − 403
case Rec(x, v) => (x, flatten(v))::env(v)
+ − 404
case Optionall(v) => env(v)
+ − 405
case Nots(s) => ("Negative", s) :: Nil
+ − 406
}
431
+ − 407
+ − 408
591
+ − 409
// The injection and mkeps part of the lexer
+ − 410
//===========================================
431
+ − 411
591
+ − 412
def mkeps(r: Rexp) : Val = r match {
+ − 413
case ONE => Empty
+ − 414
case ALTS(r1, r2) =>
+ − 415
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+ − 416
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+ − 417
case STAR(r) => Stars(Nil)
+ − 418
case RECD(x, r) => Rec(x, mkeps(r))
+ − 419
case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
+ − 420
case OPTIONAL(r) => Optionall(Empty)
+ − 421
case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")
+ − 422
else Nots("")//Nots(s.reverse.toString)
+ − 423
// case NOT(ZERO) => Empty
+ − 424
// case NOT(CHAR(c)) => Empty
+ − 425
// case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
+ − 426
// case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
+ − 427
// case NOT(STAR(r)) => Stars(Nil)
431
+ − 428
591
+ − 429
}
431
+ − 430
591
+ − 431
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+ − 432
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+ − 433
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+ − 434
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+ − 435
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+ − 436
case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
+ − 437
case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
+ − 438
case (CHAR(d), Empty) => Chr(c)
+ − 439
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+ − 440
case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
+ − 441
case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
+ − 442
case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
+ − 443
case (ANYCHAR, Empty) => Chr(c)
+ − 444
}
431
+ − 445
591
+ − 446
// some "rectification" functions for simplification
431
+ − 447
+ − 448
+ − 449
+ − 450
591
+ − 451
// The Lexing Rules for the WHILE Language
431
+ − 452
+ − 453
// bnullable function: tests whether the aregular
+ − 454
// expression can recognise the empty string
591
+ − 455
def bnullable (r: ARexp) : Boolean = r match {
431
+ − 456
case AZERO => false
+ − 457
case AONE(_) => true
+ − 458
case ACHAR(_,_) => false
+ − 459
case AALTS(_, rs) => rs.exists(bnullable)
+ − 460
case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
+ − 461
case ASTAR(_, _) => true
+ − 462
case ANOT(_, rn) => !bnullable(rn)
+ − 463
}
+ − 464
591
+ − 465
def mkepsBC(r: ARexp) : Bits = r match {
431
+ − 466
case AONE(bs) => bs
+ − 467
case AALTS(bs, rs) => {
+ − 468
val n = rs.indexWhere(bnullable)
+ − 469
bs ++ mkepsBC(rs(n))
+ − 470
}
+ − 471
case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
+ − 472
case ASTAR(bs, r) => bs ++ List(Z)
+ − 473
case ANOT(bs, rn) => bs
+ − 474
}
+ − 475
+ − 476
591
+ − 477
def bder(c: Char, r: ARexp) : ARexp = r match {
431
+ − 478
case AZERO => AZERO
+ − 479
case AONE(_) => AZERO
+ − 480
case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
+ − 481
case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
+ − 482
case ASEQ(bs, r1, r2) =>
+ − 483
if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
+ − 484
else ASEQ(bs, bder(c, r1), r2)
+ − 485
case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
+ − 486
case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
+ − 487
case AANYCHAR(bs) => AONE(bs)
+ − 488
}
+ − 489
591
+ − 490
def fuse(bs: Bits, r: ARexp) : ARexp = r match {
431
+ − 491
case AZERO => AZERO
+ − 492
case AONE(cs) => AONE(bs ++ cs)
+ − 493
case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
+ − 494
case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
+ − 495
case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
+ − 496
case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
+ − 497
case ANOT(cs, r) => ANOT(bs ++ cs, r)
+ − 498
}
+ − 499
+ − 500
591
+ − 501
def internalise(r: Rexp) : ARexp = r match {
431
+ − 502
case ZERO => AZERO
+ − 503
case ONE => AONE(Nil)
+ − 504
case CHAR(c) => ACHAR(Nil, c)
+ − 505
//case PRED(f) => APRED(Nil, f)
+ − 506
case ALTS(r1, r2) =>
+ − 507
AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
591
+ − 508
// case ALTS(r1::rs) => {
+ − 509
// val AALTS(Nil, rs2) = internalise(ALTS(rs))
+ − 510
// AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
+ − 511
// }
431
+ − 512
case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
+ − 513
case STAR(r) => ASTAR(Nil, internalise(r))
+ − 514
case RECD(x, r) => internalise(r)
+ − 515
case NOT(r) => ANOT(Nil, internalise(r))
+ − 516
case ANYCHAR => AANYCHAR(Nil)
+ − 517
}
+ − 518
+ − 519
591
+ − 520
def bsimp(r: ARexp): ARexp =
431
+ − 521
{
+ − 522
r match {
+ − 523
case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
492
+ − 524
case (AZERO, _) => AZERO
+ − 525
case (_, AZERO) => AZERO
+ − 526
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 527
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
591
+ − 528
}
+ − 529
case AALTS(bs1, rs) => {
+ − 530
val rs_simp = rs.map(bsimp(_))
+ − 531
val flat_res = flats(rs_simp)
+ − 532
val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
+ − 533
dist_res match {
+ − 534
case Nil => AZERO
+ − 535
case s :: Nil => fuse(bs1, s)
+ − 536
case rs => AALTS(bs1, rs)
+ − 537
}
+ − 538
+ − 539
}
+ − 540
case r => r
431
+ − 541
}
591
+ − 542
}
+ − 543
def strongBsimp(r: ARexp): ARexp =
+ − 544
{
+ − 545
r match {
+ − 546
case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
+ − 547
case (AZERO, _) => AZERO
+ − 548
case (_, AZERO) => AZERO
+ − 549
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 550
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 551
}
+ − 552
case AALTS(bs1, rs) => {
492
+ − 553
val rs_simp = rs.map(strongBsimp(_))
+ − 554
val flat_res = flats(rs_simp)
+ − 555
val dist_res = distinctBy4(flat_res)//distinctBy(flat_res, erase)
+ − 556
dist_res match {
+ − 557
case Nil => AZERO
+ − 558
case s :: Nil => fuse(bs1, s)
+ − 559
case rs => AALTS(bs1, rs)
+ − 560
}
591
+ − 561
+ − 562
}
+ − 563
case r => r
492
+ − 564
}
431
+ − 565
}
+ − 566
591
+ − 567
def strongBsimp5(r: ARexp): ARexp =
+ − 568
{
+ − 569
// println("was this called?")
+ − 570
r match {
+ − 571
case ASEQ(bs1, r1, r2) => (strongBsimp5(r1), strongBsimp5(r2)) match {
494
+ − 572
case (AZERO, _) => AZERO
+ − 573
case (_, AZERO) => AZERO
+ − 574
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 575
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
591
+ − 576
}
+ − 577
case AALTS(bs1, rs) => {
+ − 578
// println("alts case")
494
+ − 579
val rs_simp = rs.map(strongBsimp5(_))
+ − 580
val flat_res = flats(rs_simp)
500
+ − 581
var dist_res = distinctBy5(flat_res)//distinctBy(flat_res, erase)
518
+ − 582
// var dist2_res = distinctBy5(dist_res)
+ − 583
// while(dist_res != dist2_res){
+ − 584
// dist_res = dist2_res
+ − 585
// dist2_res = distinctBy5(dist_res)
+ − 586
// }
+ − 587
(dist_res) match {
+ − 588
case Nil => AZERO
+ − 589
case s :: Nil => fuse(bs1, s)
+ − 590
case rs => AALTS(bs1, rs)
500
+ − 591
}
591
+ − 592
}
+ − 593
case r => r
518
+ − 594
}
+ − 595
}
+ − 596
591
+ − 597
def strongBsimp6(r: ARexp): ARexp =
+ − 598
{
+ − 599
// println("was this called?")
+ − 600
r match {
+ − 601
case ASEQ(bs1, r1, r2) => (strongBsimp6(r1), strongBsimp6(r2)) match {
518
+ − 602
case (AZERO, _) => AZERO
+ − 603
case (_, AZERO) => AZERO
+ − 604
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
530
+ − 605
case (r1s, AONE(bs2)) => fuse(bs1, r1s) //asserted bs2 == Nil
532
+ − 606
//case (r1s, r2s) if(isOne(erase(r1s))) => fuse(bs1 ++ mkepsBC(r1s), r2s)
518
+ − 607
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
591
+ − 608
}
+ − 609
case AALTS(bs1, rs) => {
+ − 610
// println("alts case")
518
+ − 611
val rs_simp = rs.map(strongBsimp6(_))
+ − 612
val flat_res = flats(rs_simp)
+ − 613
var dist_res = distinctBy6(flat_res)//distinctBy(flat_res, erase)
+ − 614
(dist_res) match {
494
+ − 615
case Nil => AZERO
+ − 616
case s :: Nil => fuse(bs1, s)
+ − 617
case rs => AALTS(bs1, rs)
+ − 618
}
591
+ − 619
}
+ − 620
//(erase(r0))) => AONE(bs)
+ − 621
case r => r
494
+ − 622
}
532
+ − 623
}
+ − 624
591
+ − 625
+ − 626
def atMostEmpty(r: Rexp) : Boolean = r match {
+ − 627
case ZERO => true
+ − 628
case ONE => true
+ − 629
case STAR(r) => atMostEmpty(r)
+ − 630
case SEQ(r1, r2) => atMostEmpty(r1) && atMostEmpty(r2)
+ − 631
case ALTS(r1, r2) => atMostEmpty(r1) && atMostEmpty(r2)
+ − 632
case CHAR(_) => false
+ − 633
}
+ − 634
533
+ − 635
591
+ − 636
def isOne(r: Rexp) : Boolean = r match {
+ − 637
case ONE => true
+ − 638
case SEQ(r1, r2) => isOne(r1) && isOne(r2)
+ − 639
case ALTS(r1, r2) => (isOne(r1) || isOne(r2)) && (atMostEmpty(r1) && atMostEmpty(r2))//rs.forall(atMostEmpty) && rs.exists(isOne)
+ − 640
case STAR(r0) => atMostEmpty(r0)
+ − 641
case CHAR(c) => false
+ − 642
case ZERO => false
+ − 643
}
+ − 644
+ − 645
def distinctWith(rs: List[ARexp],
+ − 646
pruneFunction: (ARexp, Set[Rexp]) => ARexp,
+ − 647
acc: Set[Rexp] = Set()) : List[ARexp] =
+ − 648
rs match{
+ − 649
case Nil => Nil
+ − 650
case r :: rs =>
+ − 651
if(acc(erase(r)))
+ − 652
distinctWith(rs, pruneFunction, acc)
+ − 653
else {
+ − 654
val pruned_r = pruneFunction(r, acc)
+ − 655
pruned_r ::
+ − 656
distinctWith(rs,
+ − 657
pruneFunction,
+ − 658
turnIntoTerms(erase(pruned_r)) ++: acc
+ − 659
)
+ − 660
}
+ − 661
}
532
+ − 662
591
+ − 663
//r = r' ~ tail' : If tail' matches tail => returns r'
+ − 664
def removeSeqTail(r: Rexp, tail: Rexp) : Rexp = r match {
+ − 665
case SEQ(r1, r2) =>
+ − 666
if(r2 == tail)
+ − 667
r1
+ − 668
else
+ − 669
ZERO
+ − 670
case r => ZERO
+ − 671
}
532
+ − 672
591
+ − 673
def prune(r: ARexp, acc: Set[Rexp]) : ARexp = r match{
+ − 674
case AALTS(bs, rs) => rs.map(r => prune(r, acc)).filter(_ != ZERO) match
+ − 675
{
+ − 676
//all components have been removed, meaning this is effectively a duplicate
+ − 677
//flats will take care of removing this AZERO
+ − 678
case Nil => AZERO
+ − 679
case r::Nil => fuse(bs, r)
+ − 680
case rs1 => AALTS(bs, rs1)
+ − 681
}
+ − 682
case ASEQ(bs, r1, r2) =>
+ − 683
//remove the r2 in (ra + rb)r2 to identify the duplicate contents of r1
+ − 684
prune(r1, acc.map(r => removeSeqTail(r, erase(r2)))) match {
+ − 685
//after pruning, returns 0
+ − 686
case AZERO => AZERO
+ − 687
//after pruning, got r1'.r2, where r1' is equal to 1
+ − 688
case r1p if(isOne(erase(r1p))) => fuse(bs ++ mkepsBC(r1p), r2)
+ − 689
//assemble the pruned head r1p with r2
+ − 690
case r1p => ASEQ(bs, r1p, r2)
+ − 691
}
+ − 692
//this does the duplicate component removal task
+ − 693
case r => if(acc(erase(r))) AZERO else r
+ − 694
}
+ − 695
+ − 696
//a stronger version of simp
+ − 697
def bsimpStrong(r: ARexp): ARexp =
+ − 698
{
+ − 699
r match {
+ − 700
case ASEQ(bs1, r1, r2) => (bsimpStrong(r1), bsimpStrong(r2)) match {
+ − 701
//normal clauses same as simp
532
+ − 702
case (AZERO, _) => AZERO
+ − 703
case (_, AZERO) => AZERO
+ − 704
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
591
+ − 705
//bs2 can be discarded
+ − 706
case (r1s, AONE(bs2)) => fuse(bs1, r1s) //assert bs2 == Nil
532
+ − 707
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
591
+ − 708
}
+ − 709
case AALTS(bs1, rs) => {
+ − 710
//distinctBy(flat_res, erase)
+ − 711
distinctWith(flats(rs.map(bsimpStrong(_))), prune) match {
532
+ − 712
case Nil => AZERO
+ − 713
case s :: Nil => fuse(bs1, s)
+ − 714
case rs => AALTS(bs1, rs)
+ − 715
}
591
+ − 716
}
+ − 717
//stars that can be treated as 1
+ − 718
case ASTAR(bs, r0) if(atMostEmpty(erase(r0))) => AONE(bs)
+ − 719
case r => r
+ − 720
}
532
+ − 721
}
431
+ − 722
+ − 723
591
+ − 724
def bders (s: List[Char], r: ARexp) : ARexp = s match {
+ − 725
case Nil => r
+ − 726
case c::s => bders(s, bder(c, r))
+ − 727
}
492
+ − 728
591
+ − 729
def flats(rs: List[ARexp]): List[ARexp] = rs match {
+ − 730
case Nil => Nil
+ − 731
case AZERO :: rs1 => flats(rs1)
+ − 732
case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
+ − 733
case r1 :: rs2 => r1 :: flats(rs2)
+ − 734
}
+ − 735
+ − 736
def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
+ − 737
case Nil => Nil
+ − 738
case (x::xs) => {
+ − 739
val res = f(x)
+ − 740
if (acc.contains(res)) distinctBy(xs, f, acc)
+ − 741
else x::distinctBy(xs, f, res::acc)
492
+ − 742
}
591
+ − 743
}
431
+ − 744
+ − 745
591
+ − 746
def pruneRexp(r: ARexp, allowableTerms: List[Rexp]) : ARexp = {
+ − 747
r match {
+ − 748
case ASEQ(bs, r1, r2) =>
+ − 749
val termsTruncated = allowableTerms.collect(rt => rt match {
+ − 750
case SEQ(r1p, r2p) if(r2p == erase(r2)) => r1p//if(r2p == erase(r2))
+ − 751
})
+ − 752
val pruned : ARexp = pruneRexp(r1, termsTruncated)
+ − 753
pruned match {
+ − 754
case AZERO => AZERO
+ − 755
case AONE(bs1) => fuse(bs ++ bs1, r2)
+ − 756
case pruned1 => ASEQ(bs, pruned1, r2)
+ − 757
}
+ − 758
+ − 759
case AALTS(bs, rs) =>
+ − 760
//allowableTerms.foreach(a => println(shortRexpOutput(a)))
+ − 761
val rsp = rs.map(r =>
+ − 762
pruneRexp(r, allowableTerms)
+ − 763
)
+ − 764
.filter(r => r != AZERO)
+ − 765
rsp match {
+ − 766
case Nil => AZERO
+ − 767
case r1::Nil => fuse(bs, r1)
+ − 768
case rs1 => AALTS(bs, rs1)
+ − 769
}
+ − 770
case r =>
+ − 771
if(allowableTerms.contains(erase(r))) r else AZERO //assert(r != AZERO)
492
+ − 772
}
431
+ − 773
}
591
+ − 774
+ − 775
def oneSimp(r: Rexp) : Rexp = r match {
+ − 776
case SEQ(ONE, r) => r
+ − 777
case SEQ(r1, r2) => SEQ(oneSimp(r1), r2)
+ − 778
case r => r//assert r != 0
+ − 779
+ − 780
}
+ − 781
492
+ − 782
591
+ − 783
def distinctBy4(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 784
case Nil => Nil
+ − 785
case x :: xs =>
+ − 786
//assert(acc.distinct == acc)
+ − 787
val erased = erase(x)
+ − 788
if(acc.contains(erased))
+ − 789
distinctBy4(xs, acc)
+ − 790
else{
+ − 791
val addToAcc = breakIntoTerms(erased).filter(r => !acc.contains(oneSimp(r)))
+ − 792
//val xp = pruneRexp(x, addToAcc)
+ − 793
pruneRexp(x, addToAcc) match {
+ − 794
case AZERO => distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 795
case xPrime => xPrime :: distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 796
}
+ − 797
}
+ − 798
}
494
+ − 799
591
+ − 800
// fun pAKC_aux :: "arexp list ⇒ arexp ⇒ rexp ⇒ arexp"
+ − 801
// where
+ − 802
// "pAKC_aux rsa r ctx = (if (seqFold (SEQ (erase r) ( ctx) )) ⊆ (seqFold (erase (AALTs [] rsa))) then AZERO else
+ − 803
// case r of (ASEQ bs r1 r2) ⇒
+ − 804
// bsimp_ASEQ bs (pAKC_aux rsa r1 (SEQ (erase r2) ( ctx) )) r2 |
+ − 805
// (AALTs bs rs) ⇒
+ − 806
// bsimp_AALTs bs (flts (map (λr. pAKC_aux rsa r ( ctx) ) rs) ) |
+ − 807
// r ⇒ r
+ − 808
+ − 809
// def canonicalSeq(rs: List[Rexp], acc: Rexp) = rs match {
+ − 810
// case r::Nil => SEQ(r, acc)
+ − 811
// case Nil => acc
+ − 812
// case r1::r2::Nil => SEQ(SEQ(r1, r2), acc)
+ − 813
// }
494
+ − 814
+ − 815
591
+ − 816
//the "fake" Language interpretation: just concatenates!
+ − 817
def seqFold(acc: Rexp, rs: List[Rexp]) : Rexp = rs match {
+ − 818
case Nil => acc
+ − 819
case r :: rs1 =>
+ − 820
// if(acc == ONE)
+ − 821
// seqFold(r, rs1)
+ − 822
// else
+ − 823
seqFold(SEQ(acc, r), rs1)
+ − 824
}
494
+ − 825
591
+ − 826
def rprint(r: Rexp) : Unit = println(shortRexpOutput(r))
+ − 827
def rsprint(rs: Iterable[Rexp]) = rs.foreach(r => println(shortRexpOutput(r)))
500
+ − 828
591
+ − 829
def aprint(a: ARexp) = println(shortRexpOutput(erase(a)))
+ − 830
def asprint(as: List[ARexp]) = as.foreach(a => println(shortRexpOutput(erase(a))))
500
+ − 831
591
+ − 832
def pAKC(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
+ − 833
// println("pakc")
+ − 834
// println(shortRexpOutput(erase(r)))
+ − 835
// println("acc")
+ − 836
// rsprint(acc)
+ − 837
// println("ctx---------")
+ − 838
// rsprint(ctx)
+ − 839
// println("ctx---------end")
+ − 840
// rsprint(breakIntoTerms(seqFold(erase(r), ctx)).map(oneSimp))
+ − 841
+ − 842
if (breakIntoTerms(seqFold(erase(r), ctx)).map(oneSimp).forall(acc.contains)) {//acc.flatMap(breakIntoTerms
+ − 843
AZERO
494
+ − 844
}
591
+ − 845
else{
+ − 846
r match {
+ − 847
case ASEQ(bs, r1, r2) =>
+ − 848
(pAKC(acc, r1, erase(r2) :: ctx)) match{
+ − 849
case AZERO =>
+ − 850
AZERO
+ − 851
case AONE(bs1) =>
+ − 852
fuse(bs1, r2)
+ − 853
case r1p =>
+ − 854
ASEQ(bs, r1p, r2)
+ − 855
}
+ − 856
case AALTS(bs, rs0) =>
+ − 857
// println("before pruning")
+ − 858
// println(s"ctx is ")
+ − 859
// ctx.foreach(r => println(shortRexpOutput(r)))
+ − 860
// println(s"rs0 is ")
+ − 861
// rs0.foreach(r => println(shortRexpOutput(erase(r))))
+ − 862
// println(s"acc is ")
+ − 863
// acc.foreach(r => println(shortRexpOutput(r)))
+ − 864
rs0.map(r => pAKC(acc, r, ctx)).filter(_ != AZERO) match {
+ − 865
case Nil =>
+ − 866
// println("after pruning Nil")
+ − 867
AZERO
+ − 868
case r :: Nil =>
+ − 869
// println("after pruning singleton")
+ − 870
// println(shortRexpOutput(erase(r)))
+ − 871
r
+ − 872
case rs0p =>
+ − 873
// println("after pruning non-singleton")
+ − 874
AALTS(bs, rs0p)
+ − 875
}
+ − 876
case r => r
494
+ − 877
}
591
+ − 878
}
494
+ − 879
}
+ − 880
591
+ − 881
def distinctBy5(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 882
case Nil =>
+ − 883
Nil
+ − 884
case x :: xs => {
+ − 885
val erased = erase(x)
+ − 886
if(acc.contains(erased)){
494
+ − 887
distinctBy5(xs, acc)
591
+ − 888
}
+ − 889
else{
+ − 890
val pruned = pAKC(acc, x, Nil)
+ − 891
val newTerms = breakIntoTerms(erase(pruned))
+ − 892
pruned match {
+ − 893
case AZERO =>
+ − 894
distinctBy5(xs, acc)
+ − 895
case xPrime =>
+ − 896
xPrime :: distinctBy5(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 897
}
+ − 898
}
494
+ − 899
}
+ − 900
}
431
+ − 901
+ − 902
591
+ − 903
def isOne1(r: Rexp) : Boolean = r match {
+ − 904
case ONE => true
+ − 905
case SEQ(r1, r2) => false//isOne1(r1) && isOne1(r2)
+ − 906
case ALTS(r1, r2) => false//(isOne1(r1) || isOne1(r2)) && (atMostEmpty(r1) && atMostEmpty(r2))//rs.forall(atMostEmpty) && rs.exists(isOne)
+ − 907
case STAR(r0) => false//atMostEmpty(r0)
+ − 908
case CHAR(c) => false
+ − 909
case ZERO => false
+ − 910
+ − 911
}
+ − 912
+ − 913
def turnIntoTerms(r: Rexp): List[Rexp] = r match {
+ − 914
case SEQ(r1, r2) => if(isOne1(r1))
+ − 915
turnIntoTerms(r2)
+ − 916
else
+ − 917
turnIntoTerms(r1).flatMap(r11 => furtherSEQ(r11, r2))
+ − 918
case ALTS(r1, r2) => turnIntoTerms(r1) ::: turnIntoTerms(r2)
+ − 919
case ZERO => Nil
+ − 920
case _ => r :: Nil
+ − 921
}
+ − 922
+ − 923
def furtherSEQ(r11: Rexp, r2: Rexp) : List[Rexp] = {
+ − 924
if(r11 == ONE)
+ − 925
turnIntoTerms(r2)
+ − 926
else
+ − 927
SEQ(r11, r2) :: Nil
+ − 928
}
+ − 929
+ − 930
def attachCtx(r: ARexp, ctx: List[Rexp]) : Set[Rexp] = {
+ − 931
turnIntoTerms((seqFold(erase(r), ctx)))
+ − 932
.toSet
+ − 933
}
+ − 934
+ − 935
def attachCtxcc(r: Rexp, ctx: List[Rexp]) : Set[Rexp] =
+ − 936
turnIntoTerms(seqFold(r, ctx)).toSet
+ − 937
+ − 938
def ABIncludedByC[A, B, C](a: A, b: B, c: C, f: (A, B) => C,
+ − 939
subseteqPred: (C, C) => Boolean) : Boolean = {
+ − 940
subseteqPred(f(a, b), c)
+ − 941
}
+ − 942
def rs1_subseteq_rs2(rs1: Set[Rexp], rs2: Set[Rexp]) : Boolean =
+ − 943
//rs1 \subseteq rs2
+ − 944
rs1.forall(rs2.contains)
+ − 945
+ − 946
def prune6(acc: Set[Rexp], r: ARexp, ctx: List[Rexp] = Nil) : ARexp = {
+ − 947
if (ABIncludedByC(a = r, b = ctx, c = acc,
+ − 948
f = attachCtx, subseteqPred = rs1_subseteq_rs2))
+ − 949
{
+ − 950
AZERO
+ − 951
}
+ − 952
else{
+ − 953
r match {
+ − 954
case ASEQ(bs, r1, r2) => (prune6(acc, r1, erase(r2) :: ctx)) match{
+ − 955
case AZERO =>
+ − 956
AZERO
+ − 957
case AONE(bs1) => //when r1 becomes 1, chances to further prune r2
+ − 958
prune6(acc, fuse(bs1, r2), ctx)
+ − 959
case r1p =>
+ − 960
ASEQ(bs, r1p, r2)
+ − 961
}
+ − 962
case AALTS(bs, rs0) =>
+ − 963
rs0.map(r => prune6(acc, r, ctx)).filter(_ != AZERO) match {
+ − 964
case Nil =>
+ − 965
AZERO
+ − 966
case r :: Nil =>
+ − 967
fuse(bs, r)
+ − 968
case rs0p =>
+ − 969
AALTS(bs, rs0p)
+ − 970
}
+ − 971
case r => r
+ − 972
}
+ − 973
}
+ − 974
}
431
+ − 975
591
+ − 976
def prune6cc(acc: Set[Rexp], r: Rexp, ctx: List[Rexp]) : Rexp = {
+ − 977
if (ABIncludedByC(a = r, b = ctx, c = acc,
+ − 978
f = attachCtxcc, subseteqPred = rs1_subseteq_rs2))
+ − 979
{//acc.flatMap(breakIntoTerms
+ − 980
ZERO
+ − 981
}
+ − 982
else{
+ − 983
r match {
+ − 984
case SEQ(r1, r2) =>
+ − 985
(prune6cc(acc, r1, r2 :: ctx)) match{
+ − 986
case ZERO =>
+ − 987
ZERO
+ − 988
case ONE =>
+ − 989
r2
+ − 990
case r1p =>
+ − 991
SEQ(r1p, r2)
+ − 992
}
+ − 993
case ALTS(r1, r2) =>
+ − 994
List(r1, r2).map(r => prune6cc(acc, r, ctx)).filter(_ != AZERO) match {
+ − 995
case Nil =>
+ − 996
ZERO
+ − 997
case r :: Nil =>
+ − 998
r
+ − 999
case ra :: rb :: Nil =>
+ − 1000
ALTS(ra, rb)
+ − 1001
}
+ − 1002
case r => r
+ − 1003
}
+ − 1004
}
+ − 1005
}
+ − 1006
+ − 1007
def distinctBy6(xs: List[ARexp], acc: Set[Rexp] = Set()) :
+ − 1008
List[ARexp] = xs match {
+ − 1009
case Nil =>
+ − 1010
Nil
+ − 1011
case x :: xs => {
+ − 1012
val erased = erase(x)
+ − 1013
if(acc.contains(erased)){
+ − 1014
distinctBy6(xs, acc)
+ − 1015
}
+ − 1016
else{
+ − 1017
val pruned = prune6(acc, x)
+ − 1018
val newTerms = turnIntoTerms(erase(pruned))
+ − 1019
pruned match {
+ − 1020
case AZERO =>
+ − 1021
distinctBy6(xs, acc)
+ − 1022
case xPrime =>
+ − 1023
xPrime :: distinctBy6(xs, newTerms ++: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 1024
}
+ − 1025
}
+ − 1026
}
+ − 1027
}
+ − 1028
+ − 1029
def distinctByacc(xs: List[Rexp], acc: Set[Rexp] = Set()) : Set[Rexp] = xs match {
+ − 1030
case Nil =>
+ − 1031
acc
+ − 1032
case x :: xs => {
+ − 1033
if(acc.contains(x)){
+ − 1034
distinctByacc(xs, acc)
+ − 1035
}
+ − 1036
else{
+ − 1037
val pruned = prune6cc(acc, x, Nil)
+ − 1038
val newTerms = turnIntoTerms(pruned)
+ − 1039
pruned match {
+ − 1040
case ZERO =>
+ − 1041
distinctByacc(xs, acc)
+ − 1042
case xPrime =>
+ − 1043
distinctByacc(xs, newTerms.map(oneSimp) ++: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 1044
}
+ − 1045
}
+ − 1046
}
+ − 1047
}
+ − 1048
+ − 1049
def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
+ − 1050
case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
+ − 1051
case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
+ − 1052
case ZERO => Nil
+ − 1053
case _ => r::Nil
+ − 1054
}
+ − 1055
+ − 1056
def flatsIntoTerms(r: Rexp) : List[Rexp] = r match {
+ − 1057
//case SEQ(r1, r2) => flatsIntoTerms(r1).map(r11 => SEQ(r11, r2))
+ − 1058
case ALTS(r1, r2) => flatsIntoTerms(r1) ::: flatsIntoTerms(r2)
+ − 1059
case ZERO => Nil
+ − 1060
case _ => r::Nil
+ − 1061
}
+ − 1062
+ − 1063
+ − 1064
def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
+ − 1065
case (ONE, bs) => (Empty, bs)
+ − 1066
case (CHAR(f), bs) => (Chr(f), bs)
+ − 1067
case (ALTS(r1, r2), Z::bs1) => {
+ − 1068
val (v, bs2) = decode_aux(r1, bs1)
+ − 1069
(Left(v), bs2)
+ − 1070
}
+ − 1071
case (ALTS(r1, r2), S::bs1) => {
+ − 1072
val (v, bs2) = decode_aux(r2, bs1)
+ − 1073
(Right(v), bs2)
+ − 1074
}
+ − 1075
case (SEQ(r1, r2), bs) => {
+ − 1076
val (v1, bs1) = decode_aux(r1, bs)
+ − 1077
val (v2, bs2) = decode_aux(r2, bs1)
+ − 1078
(Sequ(v1, v2), bs2)
+ − 1079
}
+ − 1080
case (STAR(r1), S::bs) => {
+ − 1081
val (v, bs1) = decode_aux(r1, bs)
+ − 1082
//(v)
+ − 1083
val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
+ − 1084
(Stars(v::vs), bs2)
+ − 1085
}
+ − 1086
case (STAR(_), Z::bs) => (Stars(Nil), bs)
+ − 1087
case (RECD(x, r1), bs) => {
+ − 1088
val (v, bs1) = decode_aux(r1, bs)
+ − 1089
(Rec(x, v), bs1)
+ − 1090
}
+ − 1091
case (NOT(r), bs) => (Nots(r.toString), bs)
+ − 1092
}
+ − 1093
+ − 1094
def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
+ − 1095
case (v, Nil) => v
+ − 1096
case _ => throw new Exception("Not decodable")
+ − 1097
}
492
+ − 1098
+ − 1099
+ − 1100
591
+ − 1101
def blexing_simp(r: Rexp, s: String) : Val = {
+ − 1102
val bit_code = blex_simp(internalise(r), s.toList)
+ − 1103
decode(r, bit_code)
+ − 1104
}
+ − 1105
def simpBlexer(r: Rexp, s: String) : Val = {
+ − 1106
Try(blexing_simp(r, s)).getOrElse(Failure)
+ − 1107
}
431
+ − 1108
591
+ − 1109
def strong_blexing_simp(r: Rexp, s: String) : Val = {
+ − 1110
decode(r, strong_blex_simp(internalise(r), s.toList))
+ − 1111
}
431
+ − 1112
591
+ − 1113
def strong_blexing_simp5(r: Rexp, s: String) : Val = {
+ − 1114
decode(r, strong_blex_simp5(internalise(r), s.toList))
492
+ − 1115
}
431
+ − 1116
+ − 1117
492
+ − 1118
+ − 1119
591
+ − 1120
//def blexerStrong(r: ARexp, s: String) : Val = {
+ − 1121
// Try(strong_blexing_simp(r, s)).getOrElse(Failure)
+ − 1122
//}
+ − 1123
def strongBlexer5(r: Rexp, s: String): Val = {
+ − 1124
Try(strong_blexing_simp5(r, s)).getOrElse(Failure)
+ − 1125
}
+ − 1126
+ − 1127
def strongBlexer(r: Rexp, s: String) : Option[Val] = {
+ − 1128
Try(Some(decode(r, strong_blex_simp(internalise(r), s.toList)))).getOrElse(None)
+ − 1129
}
+ − 1130
def strong_blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 1131
case Nil => {
+ − 1132
if (bnullable(r)) {
+ − 1133
mkepsBC(r)
+ − 1134
}
+ − 1135
else
+ − 1136
throw new Exception("Not matched")
+ − 1137
}
+ − 1138
case c::cs => {
+ − 1139
strong_blex_simp(strongBsimp(bder(c, r)), cs)
+ − 1140
}
+ − 1141
}
+ − 1142
+ − 1143
def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match {
+ − 1144
case Nil => {
+ − 1145
if (bnullable(r)) {
+ − 1146
val bits = mkepsBC(r)
+ − 1147
bits
+ − 1148
}
+ − 1149
else
+ − 1150
throw new Exception("Not matched")
+ − 1151
}
+ − 1152
case c::cs => {
+ − 1153
val der_res = bder(c,r)
+ − 1154
val simp_res = strongBsimp5(der_res)
+ − 1155
strong_blex_simp5(simp_res, cs)
+ − 1156
}
+ − 1157
}
+ − 1158
+ − 1159
+ − 1160
def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
+ − 1161
case Nil => r
+ − 1162
case c::s =>
+ − 1163
//println(erase(r))
+ − 1164
bders_simp(s, bsimp(bder(c, r)))
+ − 1165
}
+ − 1166
+ − 1167
def bdersStrong5(s: List[Char], r: ARexp) : ARexp = s match {
+ − 1168
case Nil => r
+ − 1169
case c::s => bdersStrong5(s, strongBsimp5(bder(c, r)))
+ − 1170
}
+ − 1171
def bdersStrong6(s: List[Char], r: ARexp) : ARexp = s match {
+ − 1172
case Nil => r
+ − 1173
case c::s => bdersStrong6(s, strongBsimp6(bder(c, r)))
+ − 1174
}
+ − 1175
//Conjecture: [| bdersStrong(s, r) |] = O([| r |]^3)
+ − 1176
def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
+ − 1177
case Nil => r
+ − 1178
case c::s => bdersStrong(s, bsimpStrong(bder(c, r)))
+ − 1179
}
+ − 1180
def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
+ − 1181
+ − 1182
//def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
+ − 1183
// case Nil => r
+ − 1184
// case c::s => bdersStrong(s, strongBsimp(bder(c, r)))
+ − 1185
//}
+ − 1186
+ − 1187
def bdersStrongRexp(s: String, r: Rexp) : ARexp = bdersStrong(s.toList, internalise(r))
+ − 1188
+ − 1189
def erase(r:ARexp): Rexp = r match{
+ − 1190
case AZERO => ZERO
+ − 1191
case AONE(_) => ONE
+ − 1192
case ACHAR(bs, c) => CHAR(c)
+ − 1193
case AALTS(bs, Nil) => ZERO
+ − 1194
case AALTS(bs, a::Nil) => erase(a)
+ − 1195
case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
+ − 1196
case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
+ − 1197
case ASTAR(cs, r)=> STAR(erase(r))
+ − 1198
case ANOT(bs, r) => NOT(erase(r))
+ − 1199
case AANYCHAR(bs) => ANYCHAR
+ − 1200
}
+ − 1201
+ − 1202
+ − 1203
def allCharSeq(r: Rexp) : Boolean = r match {
+ − 1204
case CHAR(c) => true
+ − 1205
case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
+ − 1206
case _ => false
+ − 1207
}
+ − 1208
+ − 1209
def flattenSeq(r: Rexp) : String = r match {
+ − 1210
case CHAR(c) => c.toString
+ − 1211
case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
+ − 1212
case _ => throw new Error("flatten unflattenable rexp")
+ − 1213
}
+ − 1214
+ − 1215
def shortRexpOutput(r: Rexp) : String = r match {
+ − 1216
case CHAR(c) => c.toString
+ − 1217
case ONE => "1"
+ − 1218
case ZERO => "0"
+ − 1219
case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 1220
case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 1221
case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
+ − 1222
case STAR(STAR(r)) => "(..)*"// ++ shortRexpOutput(r) ++ "]*"
+ − 1223
case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
+ − 1224
//case RTOP => "RTOP"
+ − 1225
}
+ − 1226
+ − 1227
+ − 1228
def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 1229
case Nil => {
+ − 1230
if (bnullable(r)) {
+ − 1231
val bits = mkepsBC(r)
+ − 1232
bits
+ − 1233
}
+ − 1234
else
+ − 1235
throw new Exception("Not matched")
+ − 1236
}
+ − 1237
case c::cs => {
+ − 1238
val der_res = bder(c,r)
+ − 1239
val simp_res = bsimp(der_res)
+ − 1240
blex_simp(simp_res, cs)
+ − 1241
}
+ − 1242
}
+ − 1243
+ − 1244
+ − 1245
+ − 1246
+ − 1247
def size(r: Rexp) : Int = r match {
+ − 1248
case ZERO => 1
+ − 1249
case ONE => 1
+ − 1250
case CHAR(_) => 1
+ − 1251
case ANYCHAR => 1
+ − 1252
case NOT(r0) => 1 + size(r0)
+ − 1253
case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 1254
case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
+ − 1255
case STAR(r) => 1 + size(r)
+ − 1256
}
+ − 1257
+ − 1258
def asize(a: ARexp) = size(erase(a))
+ − 1259
+ − 1260
//pder related
+ − 1261
type Mon = (Char, Rexp)
+ − 1262
type Lin = Set[Mon]
+ − 1263
+ − 1264
def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
+ − 1265
case ZERO => Set()
+ − 1266
case ONE => rs
+ − 1267
case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) )
+ − 1268
}
+ − 1269
def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
+ − 1270
case ZERO => Set()
+ − 1271
// case ONE => l
+ − 1272
case t => l.map( m => m._2 match
+ − 1273
{
+ − 1274
case ZERO => m
+ − 1275
case ONE => (m._1, t)
+ − 1276
case p => (m._1, SEQ(p, t))
+ − 1277
}
+ − 1278
+ − 1279
)
+ − 1280
}
+ − 1281
def cir_prodList(l : List[Mon], t: Rexp): List[Mon] = t match {
+ − 1282
case ZERO => Nil
+ − 1283
case ONE => l
+ − 1284
case t => l.map(m => m._2 match
+ − 1285
{
+ − 1286
case ZERO => m
+ − 1287
case ONE => (m._1, t)
+ − 1288
case p => (m._1, SEQ(p, t))
+ − 1289
}
+ − 1290
)
+ − 1291
+ − 1292
}
+ − 1293
def lfList(r: Rexp) : List[Mon] = r match {
+ − 1294
case ZERO => Nil
+ − 1295
case ONE => Nil
+ − 1296
case CHAR(f) => {
+ − 1297
(f, ONE) :: Nil
+ − 1298
}
+ − 1299
case ALTS(r1, r2) => {
+ − 1300
lfList(r1) ++ lfList(r2)
+ − 1301
}
+ − 1302
case STAR(r1) => cir_prodList(lfList(r1), STAR(r1))
+ − 1303
case SEQ(r1, r2) => {
+ − 1304
if(nullable(r1))
+ − 1305
cir_prodList(lfList(r1), r2) ++ lfList(r2)
+ − 1306
else
+ − 1307
cir_prodList(lfList(r1), r2)
+ − 1308
}
+ − 1309
}
+ − 1310
def lfprint(ls: Lin) = ls.foreach(m =>{
+ − 1311
println(m._1)
+ − 1312
rprint(m._2)
+ − 1313
})
+ − 1314
def lf(r: Rexp): Lin = r match {
+ − 1315
case ZERO => Set()
+ − 1316
case ONE => Set()
+ − 1317
case CHAR(f) => {
+ − 1318
//val Some(c) = alphabet.find(f)
+ − 1319
Set((f, ONE))
+ − 1320
}
+ − 1321
case ALTS(r1, r2) => {
+ − 1322
lf(r1 ) ++ lf(r2)
+ − 1323
}
+ − 1324
case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
+ − 1325
case SEQ(r1, r2) =>{
+ − 1326
if (nullable(r1))
+ − 1327
cir_prod(lf(r1), r2) ++ lf(r2)
+ − 1328
else
+ − 1329
cir_prod(lf(r1), r2)
+ − 1330
}
+ − 1331
}
+ − 1332
def lfs(r: Set[Rexp]): Lin = {
+ − 1333
r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
431
+ − 1334
}
+ − 1335
591
+ − 1336
def pder(x: Char, t: Rexp): Set[Rexp] = {
+ − 1337
val lft = lf(t)
+ − 1338
(lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
543
+ − 1339
}
591
+ − 1340
def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
+ − 1341
case x::xs => pders(xs, pder(x, t))
+ − 1342
case Nil => Set(t)
543
+ − 1343
}
591
+ − 1344
def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
+ − 1345
case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 1346
case Nil => ts
543
+ − 1347
}
591
+ − 1348
def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] =
+ − 1349
ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
+ − 1350
def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
+ − 1351
//all implementation of partial derivatives that involve set union are potentially buggy
+ − 1352
//because they don't include the original regular term before they are pdered.
+ − 1353
//now only pderas is fixed.
+ − 1354
def pderas(t: Set[Rexp], d: Int): Set[Rexp] =
+ − 1355
if(d > 0)
+ − 1356
pderas(lfs(t).map(mon => mon._2), d - 1) ++ t
+ − 1357
else
+ − 1358
lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
431
+ − 1359
def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
+ − 1360
def awidth(r: Rexp) : Int = r match {
+ − 1361
case CHAR(c) => 1
+ − 1362
case SEQ(r1, r2) => awidth(r1) + awidth(r2)
+ − 1363
case ALTS(r1, r2) => awidth(r1) + awidth(r2)
+ − 1364
case ONE => 0
+ − 1365
case ZERO => 0
+ − 1366
case STAR(r) => awidth(r)
+ − 1367
}
+ − 1368
//def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
+ − 1369
//def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
+ − 1370
def o(r: Rexp) = if (nullable(r)) ONE else ZERO
+ − 1371
//def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
+ − 1372
def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
+ − 1373
case ZERO => Set[Rexp]()
+ − 1374
case ONE => Set[Rexp]()
+ − 1375
case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
+ − 1376
case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
+ − 1377
case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
+ − 1378
case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
+ − 1379
}
+ − 1380
def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
+ − 1381
case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 1382
case Nil => ts
+ − 1383
}
+ − 1384
def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
+ − 1385
+ − 1386
+ − 1387
591
+ − 1388
def starPrint(r: Rexp) : Unit = r match {
+ − 1389
+ − 1390
case SEQ(head, rstar) =>
+ − 1391
println(shortRexpOutput(head) ++ "~STARREG")
+ − 1392
case STAR(rstar) =>
+ − 1393
println("STARREG")
+ − 1394
case ALTS(r1, r2) =>
+ − 1395
println("(")
+ − 1396
starPrint(r1)
+ − 1397
println("+")
+ − 1398
starPrint(r2)
+ − 1399
println(")")
+ − 1400
case ZERO => println("0")
+ − 1401
}
+ − 1402
+ − 1403
// @arg(doc = "small tests")
+ − 1404
def n_astar_list(d: Int) : Rexp = {
+ − 1405
if(d == 0) STAR("a")
+ − 1406
else ALTS(STAR("a" * d), n_astar_list(d - 1))
+ − 1407
}
+ − 1408
def n_astar_alts(d: Int) : Rexp = d match {
+ − 1409
case 0 => n_astar_list(0)
+ − 1410
case d => STAR(n_astar_list(d))
+ − 1411
}
+ − 1412
def n_astar_alts2(d: Int) : Rexp = d match {
+ − 1413
case 0 => n_astar_list(0)
+ − 1414
case d => STAR(STAR(n_astar_list(d)))
+ − 1415
}
+ − 1416
def n_astar_aux(d: Int) = {
+ − 1417
if(d == 0) n_astar_alts(0)
+ − 1418
else ALTS(n_astar_alts(d), n_astar_alts(d - 1))
+ − 1419
}
+ − 1420
+ − 1421
def n_astar(d: Int) : Rexp = STAR(n_astar_aux(d))
+ − 1422
//val STARREG = n_astar(3)
+ − 1423
// ( STAR("a") |
+ − 1424
// ("a" | "aa").% |
+ − 1425
// ( "a" | "aa" | "aaa").%
+ − 1426
// ).%
+ − 1427
//( "a" | "aa" | "aaa" | "aaaa").% |
+ − 1428
//( "a" | "aa" | "aaa" | "aaaa" | "aaaaa").%
+ − 1429
(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa"| ( STAR("aaaaaaa")) | STAR("aaaaaaaaaaa"))).%).%).%
+ − 1430
+ − 1431
// @main
+ − 1432
+ − 1433
def lcm(list: Seq[Int]):Int=list.foldLeft(1:Int){
+ − 1434
(a, b) => b * a /
+ − 1435
Stream.iterate((a,b)){case (x,y) => (y, x%y)}.dropWhile(_._2 != 0).head._1.abs
+ − 1436
}
+ − 1437
+ − 1438
def small() = {
+ − 1439
//val pderSTAR = pderUNIV(STARREG)
+ − 1440
+ − 1441
//val refSize = pderSTAR.map(size(_)).sum
+ − 1442
for(n <- 5 to 5){
+ − 1443
val STARREG = n_astar_alts2(n)
+ − 1444
val iMax = (lcm((1 to n).toList))
+ − 1445
for(i <- 0 to iMax ){// 100, 400, 800, 840, 841, 900
+ − 1446
val prog0 = "a" * i
+ − 1447
//println(s"test: $prog0")
+ − 1448
print(i)
+ − 1449
print(" ")
+ − 1450
// print(i)
+ − 1451
// print(" ")
+ − 1452
println(asize(bders_simp(prog0.toList, internalise(STARREG))))
+ − 1453
// println(asize(bdersStrong(prog0.toList, internalise(STARREG))))
431
+ − 1454
}
+ − 1455
591
+ − 1456
}
533
+ − 1457
}
591
+ − 1458
// small()
492
+ − 1459
591
+ − 1460
def aaa_star() = {
+ − 1461
val r = STAR(("a" | "aa"))
+ − 1462
for(i <- 0 to 20) {
+ − 1463
val prog = "a" * i
+ − 1464
print(i+" ")
+ − 1465
println(asize(bders_simp(prog.toList, internalise(r))))
+ − 1466
//println(size(ders_simp(prog.toList, r)))
516
+ − 1467
}
492
+ − 1468
}
591
+ − 1469
// aaa_star()
536
+ − 1470
591
+ − 1471
def matching_ways_counting(n: Int): Int =
573
+ − 1472
{
+ − 1473
if(n == 0) 1
+ − 1474
else if(n == 1) 2
+ − 1475
else (for(i <- 1 to n - 1)
+ − 1476
yield matching_ways_counting(i) * matching_ways_counting(n - i) ).sum + (n + 1)
+ − 1477
}
591
+ − 1478
def naive_matcher() {
+ − 1479
val r = STAR(STAR("a") ~ STAR("a"))
536
+ − 1480
591
+ − 1481
// for(i <- 0 to 10) {
+ − 1482
// val s = "a" * i
+ − 1483
// val t1 = System.nanoTime
+ − 1484
// matcher(s, r)
+ − 1485
// val duration = (System.nanoTime - t1) / 1e9d
+ − 1486
// print(i)
+ − 1487
// print(" ")
+ − 1488
// // print(duration)
+ − 1489
// // print(" ")
+ − 1490
// (aprint(bders_simp(s.toList, internalise(r))))
+ − 1491
// //print(size(ders_simp(s.toList, r)))
+ − 1492
// println()
+ − 1493
// }
+ − 1494
for(i <- 1 to 3) {
+ − 1495
val s = "a" * i
+ − 1496
val t1 = System.nanoTime
+ − 1497
val derssimp_result = ders_simp(s.toList, r)
+ − 1498
println(i)
+ − 1499
println(matching_ways_counting(i))
+ − 1500
println(size(derssimp_result))
+ − 1501
rprint(derssimp_result)
+ − 1502
}
+ − 1503
573
+ − 1504
}
591
+ − 1505
naive_matcher()
+ − 1506
//single(SEQ(SEQ(STAR(CHAR('b')),STAR(STAR(SEQ(CHAR('a'),CHAR('b'))))),
+ − 1507
// SEQ(SEQ(CHAR('b'),STAR(ALTS(CHAR('a'),ONE))),ONE)))
543
+ − 1508
+ − 1509
591
+ − 1510
//STAR(SEQ(ALTS(CHAR(b),STAR(CHAR(b))),ALTS(ALTS(ONE,CHAR(b)),SEQ(CHAR(c),ONE))))
+ − 1511
def generator_test() {
492
+ − 1512
591
+ − 1513
test(single(STAR(SEQ(ALTS(STAR(CHAR('b')),ALTS(CHAR('b'),CHAR('a'))),ALTS(ALTS(CHAR('b'),CHAR('c')),STAR(ZERO))))), 1) { (r: Rexp) =>
+ − 1514
// ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) =>
+ − 1515
val ss = Set("ab")//stringsFromRexp(r)
+ − 1516
val boolList = ss.map(s => {
+ − 1517
//val bdStrong = bdersStrong(s.toList, internalise(r))
+ − 1518
val bdStrong6 = bdersStrong6(s.toList, internalise(r))
+ − 1519
val bdStrong6Set = breakIntoTerms(erase(bdStrong6))
+ − 1520
val pdersSet = pderUNIV(r)//.flatMap(r => turnIntoTerms(r))
+ − 1521
val pdersSetBroken = pdersSet.flatMap(r => turnIntoTerms(r))
+ − 1522
(rs1_subseteq_rs2(bdStrong6Set.toSet, distinctByacc(pdersSet.toList)) ||
+ − 1523
bdStrong6Set.size <= pdersSet.size || bdStrong6Set.size <= pdersSetBroken.size ||
+ − 1524
rs1_subseteq_rs2(bdStrong6Set.toSet, pdersSet union pdersSetBroken))//|| bdStrong6Set.size <= pdersSetBroken.size
+ − 1525
+ − 1526
})
+ − 1527
//!boolList.exists(b => b == false)
+ − 1528
!boolList.exists(b => b == false)
+ − 1529
}
+ − 1530
}
+ − 1531
// small()
+ − 1532
// generator_test()
+ − 1533
543
+ − 1534
+ − 1535
591
+ − 1536
//STAR(STAR(STAR(SEQ(SEQ(ALTS(STAR(ALTS(ONE,CHAR('c'))),
+ − 1537
// CHAR('c')),ALTS(CHAR('a'),ALTS(STAR(CHAR('b')),ALTS(CHAR('a'),ZERO)))),
+ − 1538
// CHAR('c')))))//SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE)))//STAR(SEQ(ALTS(STAR(CHAR('c')),CHAR('c')),SEQ(ALTS(CHAR('c'),ONE),ONE)))
+ − 1539
//counterexample1: STAR(SEQ(ALTS(STAR(ZERO),ALTS(CHAR(a),CHAR(b))),SEQ(ONE,ALTS(CHAR(a),CHAR(b)))))
+ − 1540
//counterexample2: SEQ(ALTS(SEQ(CHAR(a),STAR(ONE)),STAR(ONE)),ALTS(CHAR(a),SEQ(ALTS(CHAR(c),CHAR(a)),CHAR(b))))
541
+ − 1541
591
+ − 1542
//new ce1 : STAR(SEQ(ALTS(ALTS(ONE,CHAR(a)),SEQ(ONE,CHAR(b))),ALTS(CHAR(a),ALTS(CHAR(b),CHAR(a)))))
+ − 1543
//new ce2 : ALTS(CHAR(b),SEQ(ALTS(ZERO,ALTS(CHAR(b),CHAR(b))),ALTS(ALTS(CHAR(a),CHAR(b)),SEQ(CHAR(c),ONE))))
+ − 1544
//new ce3 : SEQ(CHAR(b),ALTS(ALTS(ALTS(ONE,CHAR(a)),SEQ(CHAR(c),ONE)),SEQ(STAR(ZERO),SEQ(ONE,CHAR(b)))))
+ − 1545
//circular double-nullable STAR(SEQ((STAR("b") | "a" | "b"), ("b" | ONE)))
+ − 1546
def counterexample_check() {
+ − 1547
val r = STAR(SEQ(ALTS(STAR(CHAR('b')),ALTS(CHAR('b'),CHAR('a'))),
+ − 1548
ALTS(ALTS(CHAR('b'),CHAR('c')),STAR(ZERO))))//SEQ(SEQ(STAR(ALTS(CHAR('a'),CHAR('b'))),
+ − 1549
//ALTS(ALTS(CHAR('c'),CHAR('b')),STAR(ONE))),STAR(CHAR('b')))
+ − 1550
val s = "ab"
+ − 1551
val bdStrong5 = bdersStrong6(s.toList, internalise(r))
+ − 1552
val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
+ − 1553
val pdersSet = pderUNIV(r)//.map(r => erase(bsimp(internalise(r))))//.flatMap(r => turnIntoTerms(r))//.map(oneSimp).flatMap(r => breakIntoTerms(r))
+ − 1554
val apdersSet = pdersSet.map(internalise)
+ − 1555
println("original regex ")
+ − 1556
rprint(r)
+ − 1557
println("after strong bsimp")
+ − 1558
aprint(bdStrong5)
+ − 1559
println("turned into a set %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ")
+ − 1560
rsprint(bdStrong5Set)
+ − 1561
println("after pderUNIV")
+ − 1562
rsprint(pdersSet.toList)
+ − 1563
println("pderUNIV distinctBy6")
+ − 1564
//asprint(distinctBy6(apdersSet.toList))
+ − 1565
rsprint((pdersSet.toList).flatMap(turnIntoTerms))
+ − 1566
// rsprint(turnIntoTerms(pdersSet.toList(3)))
+ − 1567
// println("NO 3 not into terms")
+ − 1568
// rprint((pdersSet.toList()))
+ − 1569
// println("after pderUNIV broken")
+ − 1570
// rsprint(pdersSet.flatMap(r => turnIntoTerms(r)).toList)
532
+ − 1571
591
+ − 1572
}
543
+ − 1573
591
+ − 1574
// counterexample_check()
543
+ − 1575
591
+ − 1576
def lf_display() {
+ − 1577
val r = STAR(SEQ(ALTS(STAR(CHAR('b')),ALTS(CHAR('b'),CHAR('a'))),
+ − 1578
ALTS(ALTS(CHAR('b'),CHAR('c')),STAR(ZERO))))//SEQ(SEQ(STAR(ALTS(CHAR('a'),CHAR('b'))),
+ − 1579
//ALTS(ALTS(CHAR('c'),CHAR('b')),STAR(ONE))),STAR(CHAR('b')))
+ − 1580
val s = "ab"
+ − 1581
val bdStrong5 = bdersStrong6(s.toList, internalise(r))
+ − 1582
val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
+ − 1583
val pdersSet = pderUNIV(r)//.map(r => erase(bsimp(internalise(r))))//.flatMap(r => turnIntoTerms(r))//.map(oneSimp).flatMap(r => breakIntoTerms(r))
+ − 1584
val apdersSet = pdersSet.map(internalise)
+ − 1585
lfprint(lf(r))
543
+ − 1586
591
+ − 1587
}
543
+ − 1588
591
+ − 1589
lf_display()
541
+ − 1590
591
+ − 1591
def breakable(r: Rexp) : Boolean = r match {
+ − 1592
case SEQ(ALTS(_, _), _) => true
+ − 1593
case SEQ(r1, r2) => breakable(r1)
+ − 1594
case _ => false
+ − 1595
}
532
+ − 1596
591
+ − 1597
def linform_test() {
+ − 1598
val r = STAR(SEQ(STAR(CHAR('c')),ONE))//SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE))) //
+ − 1599
val r_linforms = lf(r)
+ − 1600
println(r_linforms.size)
+ − 1601
val pderuniv = pderUNIV(r)
+ − 1602
println(pderuniv.size)
+ − 1603
}
+ − 1604
// linform_test()
+ − 1605
// 1
+ − 1606
def newStrong_test() {
+ − 1607
val r2 = (CHAR('b') | ONE)
+ − 1608
val r0 = CHAR('d')
+ − 1609
val r1 = (ONE | CHAR('c'))
+ − 1610
val expRexp = (SEQ(r2, r0) | SEQ(SEQ(r1, r2), r0))
+ − 1611
println(s"original regex is: ")
+ − 1612
rprint(expRexp)
+ − 1613
val expSimp5 = strongBsimp5(internalise(expRexp))
+ − 1614
val expSimp6 = strongBsimp6(internalise(expRexp))
+ − 1615
aprint(expSimp5)
+ − 1616
aprint(expSimp6)
+ − 1617
}
+ − 1618
// newStrong_test()
+ − 1619
// SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))),
+ − 1620
// SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))),
+ − 1621
// STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))),
+ − 1622
// SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE))
493
+ − 1623
+ − 1624
591
+ − 1625
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))
+ − 1626
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))
543
+ − 1627
+ − 1628
+ − 1629
+ − 1630
+ − 1631
591
+ − 1632
def bders_bderssimp() {
543
+ − 1633
591
+ − 1634
test(single(SEQ(ALTS(ONE,CHAR('c')),
+ − 1635
SEQ(SEQ(CHAR('a'),CHAR('a')),ALTS(ONE,CHAR('c'))))), 1) { (r: Rexp) =>
+ − 1636
// ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) =>
+ − 1637
val ss = Set("aa")//stringsFromRexp(r)
+ − 1638
val boolList = ss.map(s => {
+ − 1639
//val bdStrong = bdersStrong(s.toList, internalise(r))
+ − 1640
val bds = bsimp(bders(s.toList, internalise(r)))
+ − 1641
val bdssimp = bsimp(bders_simp(s.toList, internalise(r)))
+ − 1642
//val pdersSet = pderUNIV(r)//.flatMap(r => turnIntoTerms(r))
+ − 1643
//val pdersSetBroken = pdersSet.flatMap(r => turnIntoTerms(r))
+ − 1644
//rs1_subseteq_rs2(bdStrong6Set.toSet, distinctByacc(pdersSet.toList))
+ − 1645
//bdStrong6Set.size <= pdersSet.size || bdStrong6Set.size <= pdersSetBroken.size ||
+ − 1646
//rs1_subseteq_rs2(bdStrong6Set.toSet, pdersSet union pdersSetBroken)//|| bdStrong6Set.size <= pdersSetBroken.size
+ − 1647
rprint(r)
+ − 1648
println(bds)
+ − 1649
println(bdssimp)
+ − 1650
bds == bdssimp
+ − 1651
})
+ − 1652
//!boolList.exists(b => b == false)
+ − 1653
!boolList.exists(b => b == false)
+ − 1654
}
+ − 1655
}
+ − 1656
// bders_bderssimp()
543
+ − 1657
+ − 1658
591
+ − 1659