518
+ − 1
//Strong Bsimp to obtain Antimirov's cubic bound
+ − 2
431
+ − 3
// A simple lexer inspired by work of Sulzmann & Lu
+ − 4
//==================================================
+ − 5
//
+ − 6
// Call the test cases with
+ − 7
//
+ − 8
// amm lexer.sc small
+ − 9
// amm lexer.sc fib
+ − 10
// amm lexer.sc loops
+ − 11
// amm lexer.sc email
+ − 12
//
+ − 13
// amm lexer.sc all
+ − 14
514
+ − 15
431
+ − 16
+ − 17
// regular expressions including records
+ − 18
abstract class Rexp
+ − 19
case object ZERO extends Rexp
+ − 20
case object ONE extends Rexp
+ − 21
case object ANYCHAR extends Rexp
+ − 22
case class CHAR(c: Char) extends Rexp
+ − 23
case class ALTS(r1: Rexp, r2: Rexp) extends Rexp
539
+ − 24
case class ALTSS(rs: List[Rexp]) extends Rexp
431
+ − 25
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+ − 26
case class STAR(r: Rexp) extends Rexp
+ − 27
case class RECD(x: String, r: Rexp) extends Rexp
+ − 28
case class NTIMES(n: Int, r: Rexp) extends Rexp
+ − 29
case class OPTIONAL(r: Rexp) extends Rexp
+ − 30
case class NOT(r: Rexp) extends Rexp
591
+ − 31
// records for extracting strings or tokens
+ − 32
431
+ − 33
// values
+ − 34
abstract class Val
492
+ − 35
case object Failure extends Val
431
+ − 36
case object Empty extends Val
+ − 37
case class Chr(c: Char) extends Val
+ − 38
case class Sequ(v1: Val, v2: Val) extends Val
+ − 39
case class Left(v: Val) extends Val
+ − 40
case class Right(v: Val) extends Val
+ − 41
case class Stars(vs: List[Val]) extends Val
+ − 42
case class Rec(x: String, v: Val) extends Val
+ − 43
case class Ntime(vs: List[Val]) extends Val
+ − 44
case class Optionall(v: Val) extends Val
+ − 45
case class Nots(s: String) extends Val
+ − 46
+ − 47
+ − 48
+ − 49
abstract class Bit
+ − 50
case object Z extends Bit
+ − 51
case object S extends Bit
+ − 52
+ − 53
+ − 54
type Bits = List[Bit]
+ − 55
+ − 56
abstract class ARexp
+ − 57
case object AZERO extends ARexp
+ − 58
case class AONE(bs: Bits) extends ARexp
+ − 59
case class ACHAR(bs: Bits, c: Char) extends ARexp
+ − 60
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp
+ − 61
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp
+ − 62
case class ASTAR(bs: Bits, r: ARexp) extends ARexp
+ − 63
case class ANOT(bs: Bits, r: ARexp) extends ARexp
+ − 64
case class AANYCHAR(bs: Bits) extends ARexp
+ − 65
514
+ − 66
import scala.util.Try
+ − 67
492
+ − 68
trait Generator[+T] {
591
+ − 69
self => // an alias for "this"
492
+ − 70
def generate(): T
591
+ − 71
492
+ − 72
def gen(n: Int) : List[T] =
+ − 73
if (n == 0) Nil else self.generate() :: gen(n - 1)
591
+ − 74
492
+ − 75
def map[S](f: T => S): Generator[S] = new Generator[S] {
+ − 76
def generate = f(self.generate())
+ − 77
}
+ − 78
def flatMap[S](f: T => Generator[S]): Generator[S] = new Generator[S] {
+ − 79
def generate = f(self.generate()).generate()
+ − 80
}
+ − 81
+ − 82
+ − 83
}
+ − 84
591
+ − 85
// tests a property according to a given random generator
+ − 86
def test[T](r: Generator[T], amount: Int = 100)(pred: T => Boolean) {
+ − 87
for (_ <- 0 until amount) {
+ − 88
val value = r.generate()
+ − 89
assert(pred(value), s"Test failed for: $value")
492
+ − 90
}
591
+ − 91
println(s"Test passed $amount times")
+ − 92
}
+ − 93
def test2[T, S](r: Generator[T], s: Generator[S], amount: Int = 100)(pred: (T, S) => Boolean) {
+ − 94
for (_ <- 0 until amount) {
+ − 95
val valueR = r.generate()
+ − 96
val valueS = s.generate()
+ − 97
assert(pred(valueR, valueS), s"Test failed for: $valueR, $valueS")
492
+ − 98
}
591
+ − 99
println(s"Test passed $amount times")
+ − 100
}
492
+ − 101
+ − 102
// random integers
+ − 103
val integers = new Generator[Int] {
+ − 104
val rand = new java.util.Random
+ − 105
def generate() = rand.nextInt()
+ − 106
}
+ − 107
+ − 108
// random booleans
+ − 109
val booleans = integers.map(_ > 0)
591
+ − 110
492
+ − 111
// random integers in the range lo and high
+ − 112
def range(lo: Int, hi: Int): Generator[Int] =
+ − 113
for (x <- integers) yield (lo + x.abs % (hi - lo)).abs
+ − 114
+ − 115
// random characters
+ − 116
def chars_range(lo: Char, hi: Char) = range(lo, hi).map(_.toChar)
+ − 117
val chars = chars_range('a', 'z')
+ − 118
+ − 119
+ − 120
def oneOf[T](xs: T*): Generator[T] =
+ − 121
for (idx <- range(0, xs.length)) yield xs(idx)
591
+ − 122
492
+ − 123
def single[T](x: T) = new Generator[T] {
+ − 124
def generate() = x
+ − 125
}
+ − 126
+ − 127
def pairs[T, U](t: Generator[T], u: Generator[U]): Generator[(T, U)] =
+ − 128
for (x <- t; y <- u) yield (x, y)
+ − 129
+ − 130
// lists
+ − 131
def emptyLists = single(Nil)
+ − 132
+ − 133
def nonEmptyLists : Generator[List[Int]] =
+ − 134
for (head <- integers; tail <- lists) yield head :: tail
+ − 135
+ − 136
def lists: Generator[List[Int]] = for {
+ − 137
kind <- booleans
+ − 138
list <- if (kind) emptyLists else nonEmptyLists
+ − 139
} yield list
+ − 140
+ − 141
def char_list(len: Int): Generator[List[Char]] = {
+ − 142
if(len <= 0) single(Nil)
+ − 143
else{
+ − 144
for {
500
+ − 145
c <- chars_range('a', 'c')
492
+ − 146
tail <- char_list(len - 1)
+ − 147
} yield c :: tail
+ − 148
}
+ − 149
}
+ − 150
+ − 151
def strings(len: Int): Generator[String] = for(cs <- char_list(len)) yield cs.toString
+ − 152
493
+ − 153
def sampleString(r: Rexp) : List[String] = r match {
+ − 154
case STAR(r) => stringsFromRexp(r).flatMap(s => List("", s, s ++ s))//only generate 0, 1, 2 reptitions
+ − 155
case SEQ(r1, r2) => stringsFromRexp(r1).flatMap(s1 => stringsFromRexp(r2).map(s2 => s1 ++ s2) )
+ − 156
case ALTS(r1, r2) => throw new Error(s" Rexp ${r} not expected: all alternatives are supposed to have been opened up")
+ − 157
case ONE => "" :: Nil
+ − 158
case ZERO => Nil
+ − 159
case CHAR(c) => c.toString :: Nil
+ − 160
+ − 161
}
+ − 162
+ − 163
def stringsFromRexp(r: Rexp) : List[String] =
+ − 164
breakIntoTerms(r).flatMap(r => sampleString(r))
+ − 165
492
+ − 166
+ − 167
// (simple) binary trees
+ − 168
trait Tree[T]
+ − 169
case class Inner[T](left: Tree[T], right: Tree[T]) extends Tree[T]
+ − 170
case class Leaf[T](x: T) extends Tree[T]
+ − 171
+ − 172
def leafs[T](t: Generator[T]): Generator[Leaf[T]] =
+ − 173
for (x <- t) yield Leaf(x)
+ − 174
+ − 175
def inners[T](t: Generator[T]): Generator[Inner[T]] =
+ − 176
for (l <- trees(t); r <- trees(t)) yield Inner(l, r)
+ − 177
+ − 178
def trees[T](t: Generator[T]): Generator[Tree[T]] =
+ − 179
for (kind <- range(0, 2);
+ − 180
tree <- if (kind == 0) leafs(t) else inners(t)) yield tree
+ − 181
+ − 182
// regular expressions
+ − 183
+ − 184
// generates random leaf-regexes; prefers CHAR-regexes
+ − 185
def leaf_rexp() : Generator[Rexp] =
+ − 186
for (kind <- range(0, 5);
+ − 187
c <- chars_range('a', 'd')) yield
591
+ − 188
kind match {
+ − 189
case 0 => ZERO
+ − 190
case 1 => ONE
+ − 191
case _ => CHAR(c)
+ − 192
}
492
+ − 193
+ − 194
// generates random inner regexes with maximum depth d
+ − 195
def inner_rexp(d: Int) : Generator[Rexp] =
+ − 196
for (kind <- range(0, 3);
+ − 197
l <- rexp(d);
+ − 198
r <- rexp(d))
591
+ − 199
yield kind match {
+ − 200
case 0 => ALTS(l, r)
+ − 201
case 1 => SEQ(l, r)
+ − 202
case 2 => STAR(r)
+ − 203
}
492
+ − 204
+ − 205
// generates random regexes with maximum depth d;
+ − 206
// prefers inner regexes in 2/3 of the cases
+ − 207
def rexp(d: Int = 100): Generator[Rexp] =
+ − 208
for (kind <- range(0, 3);
+ − 209
r <- if (d <= 0 || kind == 0) leaf_rexp() else inner_rexp(d - 1)) yield r
+ − 210
+ − 211
+ − 212
// some test functions for rexps
+ − 213
def height(r: Rexp) : Int = r match {
+ − 214
case ZERO => 1
+ − 215
case ONE => 1
+ − 216
case CHAR(_) => 1
+ − 217
case ALTS(r1, r2) => 1 + List(height(r1), height(r2)).max
+ − 218
case SEQ(r1, r2) => 1 + List(height(r1), height(r2)).max
+ − 219
case STAR(r) => 1 + height(r)
+ − 220
}
+ − 221
+ − 222
// def size(r: Rexp) : Int = r match {
+ − 223
// case ZERO => 1
+ − 224
// case ONE => 1
+ − 225
// case CHAR(_) => 1
+ − 226
// case ALTS(r1, r2) => 1 + size(r1) + size(r2)
+ − 227
// case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 228
// case STAR(r) => 1 + size(r)
+ − 229
// }
+ − 230
+ − 231
// randomly subtracts 1 or 2 from the STAR case
+ − 232
def size_faulty(r: Rexp) : Int = r match {
+ − 233
case ZERO => 1
+ − 234
case ONE => 1
+ − 235
case CHAR(_) => 1
+ − 236
case ALTS(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
+ − 237
case SEQ(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
+ − 238
case STAR(r) => 1 + size_faulty(r) - range(0, 2).generate
+ − 239
}
+ − 240
431
+ − 241
591
+ − 242
431
+ − 243
// some convenience for typing in regular expressions
+ − 244
+ − 245
def charlist2rexp(s : List[Char]): Rexp = s match {
+ − 246
case Nil => ONE
+ − 247
case c::Nil => CHAR(c)
+ − 248
case c::s => SEQ(CHAR(c), charlist2rexp(s))
+ − 249
}
+ − 250
implicit def string2rexp(s : String) : Rexp =
+ − 251
charlist2rexp(s.toList)
+ − 252
591
+ − 253
implicit def RexpOps(r: Rexp) = new {
+ − 254
def | (s: Rexp) = ALTS(r, s)
+ − 255
def % = STAR(r)
+ − 256
def ~ (s: Rexp) = SEQ(r, s)
+ − 257
}
431
+ − 258
591
+ − 259
implicit def stringOps(s: String) = new {
+ − 260
def | (r: Rexp) = ALTS(s, r)
+ − 261
def | (r: String) = ALTS(s, r)
+ − 262
def % = STAR(s)
+ − 263
def ~ (r: Rexp) = SEQ(s, r)
+ − 264
def ~ (r: String) = SEQ(s, r)
+ − 265
def $ (r: Rexp) = RECD(s, r)
+ − 266
}
431
+ − 267
591
+ − 268
def nullable(r: Rexp) : Boolean = r match {
+ − 269
case ZERO => false
+ − 270
case ONE => true
+ − 271
case CHAR(_) => false
+ − 272
case ANYCHAR => false
+ − 273
case ALTS(r1, r2) => nullable(r1) || nullable(r2)
+ − 274
case ALTSS(rs) => rs.exists(nullable)
+ − 275
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ − 276
case STAR(_) => true
+ − 277
case RECD(_, r1) => nullable(r1)
+ − 278
case NTIMES(n, r) => if (n == 0) true else nullable(r)
+ − 279
case OPTIONAL(r) => true
+ − 280
case NOT(r) => !nullable(r)
+ − 281
}
431
+ − 282
591
+ − 283
def der(c: Char, r: Rexp) : Rexp = r match {
+ − 284
case ZERO => ZERO
+ − 285
case ONE => ZERO
+ − 286
case CHAR(d) => if (c == d) ONE else ZERO
+ − 287
case ANYCHAR => ONE
+ − 288
case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
+ − 289
case ALTSS(rs) => ALTSS(rs.map(der(c, _)))
+ − 290
case SEQ(r1, r2) =>
+ − 291
if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
+ − 292
else SEQ(der(c, r1), r2)
+ − 293
case STAR(r) => SEQ(der(c, r), STAR(r))
+ − 294
case RECD(_, r1) => der(c, r1)
+ − 295
case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
+ − 296
case OPTIONAL(r) => der(c, r)
+ − 297
case NOT(r) => NOT(der(c, r))
+ − 298
}
431
+ − 299
591
+ − 300
def ders(s: List[Char], r: Rexp) : Rexp = s match {
+ − 301
case Nil => r
+ − 302
case c :: cs => ders(cs, der(c, r))
+ − 303
}
539
+ − 304
591
+ − 305
def ders_simp(s: List[Char], r: Rexp) : Rexp = s match {
+ − 306
case Nil => simp(r)
+ − 307
case c :: cs => ders_simp(cs, simp(der(c, r)))
+ − 308
}
539
+ − 309
+ − 310
591
+ − 311
def simp2(r: Rexp) : Rexp = r match {
+ − 312
case SEQ(r1, r2) =>
+ − 313
(simp2(r1), simp2(r2)) match {
+ − 314
case (ZERO, _) => ZERO
+ − 315
case (_, ZERO) => ZERO
+ − 316
case (ONE, r2s) => r2s
+ − 317
case (r1s, ONE) => r1s
+ − 318
case (r1s, r2s) =>
+ − 319
SEQ(r1s, r2s)
+ − 320
}
+ − 321
case ALTS(r1, r2) =>
+ − 322
val r1r2s = fltsplain(simp2(r1) :: simp2(r2) :: Nil).distinct
+ − 323
r1r2s match {
+ − 324
case Nil => ZERO
+ − 325
case r :: Nil => r
+ − 326
case r1 :: r2 :: rs =>
+ − 327
ALTSS(r1 :: r2 :: rs)
+ − 328
}
+ − 329
case ALTSS(rs) =>
+ − 330
// println(rs)
+ − 331
(fltsplain(rs.map(simp2))).distinct match {
+ − 332
case Nil => ZERO
+ − 333
case r :: Nil => r
+ − 334
case r1 :: r2 :: rs =>
+ − 335
ALTSS(r1 :: r2 :: rs)
+ − 336
}
+ − 337
case r => r
+ − 338
}
539
+ − 339
+ − 340
591
+ − 341
def simp(r: Rexp) : Rexp = r match {
+ − 342
case SEQ(r1, r2) =>
+ − 343
(simp(r1), simp(r2)) match {
+ − 344
case (ZERO, _) => ZERO
+ − 345
case (_, ZERO) => ZERO
+ − 346
case (ONE, r2s) => r2s
+ − 347
case (r1s, ONE) => r1s
+ − 348
case (r1s, r2s) => SEQ(r1s, r2s)
+ − 349
}
+ − 350
case ALTS(r1, r2) => {
+ − 351
(simp(r1), simp(r2)) match {
+ − 352
case (ZERO, r2s) => r2s
+ − 353
case (r1s, ZERO) => r1s
+ − 354
case (r1s, r2s) =>
+ − 355
if(r1s == r2s) r1s else ALTS(r1s, r2s)
+ − 356
}
+ − 357
}
+ − 358
case r => r
539
+ − 359
}
+ − 360
591
+ − 361
def fltsplain(rs: List[Rexp]) : List[Rexp] = rs match {
+ − 362
case Nil => Nil
+ − 363
case ZERO :: rs => fltsplain(rs)
+ − 364
case ALTS(r1, r2) :: rs => r1 :: r2 :: fltsplain(rs)
+ − 365
case ALTSS(rs) :: rs1 => rs ::: fltsplain(rs1)
+ − 366
case r :: rs => r :: fltsplain(rs)
+ − 367
}
539
+ − 368
+ − 369
+ − 370
+ − 371
591
+ − 372
def matcher(s: String, r: Rexp) : Boolean =
+ − 373
nullable(ders(s.toList, r))
539
+ − 374
591
+ − 375
def simp_matcher(s: String, r: Rexp) : Boolean =
+ − 376
nullable(ders_simp(s.toList, r))
539
+ − 377
431
+ − 378
591
+ − 379
// extracts a string from a value
+ − 380
def flatten(v: Val) : String = v match {
+ − 381
case Empty => ""
+ − 382
case Chr(c) => c.toString
+ − 383
case Left(v) => flatten(v)
+ − 384
case Right(v) => flatten(v)
+ − 385
case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
+ − 386
case Stars(vs) => vs.map(flatten).mkString
+ − 387
case Ntime(vs) => vs.map(flatten).mkString
+ − 388
case Optionall(v) => flatten(v)
+ − 389
case Rec(_, v) => flatten(v)
+ − 390
}
431
+ − 391
+ − 392
591
+ − 393
// extracts an environment from a value;
+ − 394
// used for tokenising a string
+ − 395
def env(v: Val) : List[(String, String)] = v match {
+ − 396
case Empty => Nil
+ − 397
case Chr(c) => Nil
+ − 398
case Left(v) => env(v)
+ − 399
case Right(v) => env(v)
+ − 400
case Sequ(v1, v2) => env(v1) ::: env(v2)
+ − 401
case Stars(vs) => vs.flatMap(env)
+ − 402
case Ntime(vs) => vs.flatMap(env)
+ − 403
case Rec(x, v) => (x, flatten(v))::env(v)
+ − 404
case Optionall(v) => env(v)
+ − 405
case Nots(s) => ("Negative", s) :: Nil
+ − 406
}
431
+ − 407
+ − 408
591
+ − 409
// The injection and mkeps part of the lexer
+ − 410
//===========================================
431
+ − 411
591
+ − 412
def mkeps(r: Rexp) : Val = r match {
+ − 413
case ONE => Empty
+ − 414
case ALTS(r1, r2) =>
+ − 415
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+ − 416
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+ − 417
case STAR(r) => Stars(Nil)
+ − 418
case RECD(x, r) => Rec(x, mkeps(r))
+ − 419
case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
+ − 420
case OPTIONAL(r) => Optionall(Empty)
+ − 421
case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")
+ − 422
else Nots("")//Nots(s.reverse.toString)
+ − 423
// case NOT(ZERO) => Empty
+ − 424
// case NOT(CHAR(c)) => Empty
+ − 425
// case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
+ − 426
// case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
+ − 427
// case NOT(STAR(r)) => Stars(Nil)
431
+ − 428
591
+ − 429
}
431
+ − 430
591
+ − 431
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+ − 432
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+ − 433
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+ − 434
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+ − 435
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+ − 436
case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
+ − 437
case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
+ − 438
case (CHAR(d), Empty) => Chr(c)
+ − 439
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+ − 440
case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
+ − 441
case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
+ − 442
case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
+ − 443
case (ANYCHAR, Empty) => Chr(c)
+ − 444
}
431
+ − 445
591
+ − 446
// some "rectification" functions for simplification
431
+ − 447
+ − 448
+ − 449
+ − 450
591
+ − 451
// The Lexing Rules for the WHILE Language
431
+ − 452
+ − 453
// bnullable function: tests whether the aregular
+ − 454
// expression can recognise the empty string
591
+ − 455
def bnullable (r: ARexp) : Boolean = r match {
431
+ − 456
case AZERO => false
+ − 457
case AONE(_) => true
+ − 458
case ACHAR(_,_) => false
+ − 459
case AALTS(_, rs) => rs.exists(bnullable)
+ − 460
case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
+ − 461
case ASTAR(_, _) => true
+ − 462
case ANOT(_, rn) => !bnullable(rn)
+ − 463
}
+ − 464
591
+ − 465
def mkepsBC(r: ARexp) : Bits = r match {
431
+ − 466
case AONE(bs) => bs
+ − 467
case AALTS(bs, rs) => {
+ − 468
val n = rs.indexWhere(bnullable)
+ − 469
bs ++ mkepsBC(rs(n))
+ − 470
}
+ − 471
case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
+ − 472
case ASTAR(bs, r) => bs ++ List(Z)
+ − 473
case ANOT(bs, rn) => bs
+ − 474
}
+ − 475
+ − 476
591
+ − 477
def bder(c: Char, r: ARexp) : ARexp = r match {
431
+ − 478
case AZERO => AZERO
+ − 479
case AONE(_) => AZERO
+ − 480
case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
+ − 481
case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
+ − 482
case ASEQ(bs, r1, r2) =>
+ − 483
if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
+ − 484
else ASEQ(bs, bder(c, r1), r2)
+ − 485
case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
+ − 486
case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
+ − 487
case AANYCHAR(bs) => AONE(bs)
+ − 488
}
+ − 489
591
+ − 490
def fuse(bs: Bits, r: ARexp) : ARexp = r match {
431
+ − 491
case AZERO => AZERO
+ − 492
case AONE(cs) => AONE(bs ++ cs)
+ − 493
case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
+ − 494
case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
+ − 495
case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
+ − 496
case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
+ − 497
case ANOT(cs, r) => ANOT(bs ++ cs, r)
+ − 498
}
+ − 499
+ − 500
591
+ − 501
def internalise(r: Rexp) : ARexp = r match {
431
+ − 502
case ZERO => AZERO
+ − 503
case ONE => AONE(Nil)
+ − 504
case CHAR(c) => ACHAR(Nil, c)
+ − 505
//case PRED(f) => APRED(Nil, f)
+ − 506
case ALTS(r1, r2) =>
+ − 507
AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
591
+ − 508
// case ALTS(r1::rs) => {
+ − 509
// val AALTS(Nil, rs2) = internalise(ALTS(rs))
+ − 510
// AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
+ − 511
// }
431
+ − 512
case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
+ − 513
case STAR(r) => ASTAR(Nil, internalise(r))
+ − 514
case RECD(x, r) => internalise(r)
+ − 515
case NOT(r) => ANOT(Nil, internalise(r))
+ − 516
case ANYCHAR => AANYCHAR(Nil)
+ − 517
}
+ − 518
+ − 519
591
+ − 520
def bsimp(r: ARexp): ARexp =
431
+ − 521
{
+ − 522
r match {
+ − 523
case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
492
+ − 524
case (AZERO, _) => AZERO
+ − 525
case (_, AZERO) => AZERO
+ − 526
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 527
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
591
+ − 528
}
+ − 529
case AALTS(bs1, rs) => {
+ − 530
val rs_simp = rs.map(bsimp(_))
+ − 531
val flat_res = flats(rs_simp)
+ − 532
val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
+ − 533
dist_res match {
+ − 534
case Nil => AZERO
+ − 535
case s :: Nil => fuse(bs1, s)
+ − 536
case rs => AALTS(bs1, rs)
+ − 537
}
+ − 538
+ − 539
}
+ − 540
case r => r
431
+ − 541
}
591
+ − 542
}
+ − 543
def strongBsimp(r: ARexp): ARexp =
+ − 544
{
+ − 545
r match {
+ − 546
case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
+ − 547
case (AZERO, _) => AZERO
+ − 548
case (_, AZERO) => AZERO
+ − 549
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 550
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 551
}
+ − 552
case AALTS(bs1, rs) => {
492
+ − 553
val rs_simp = rs.map(strongBsimp(_))
+ − 554
val flat_res = flats(rs_simp)
+ − 555
val dist_res = distinctBy4(flat_res)//distinctBy(flat_res, erase)
+ − 556
dist_res match {
+ − 557
case Nil => AZERO
+ − 558
case s :: Nil => fuse(bs1, s)
+ − 559
case rs => AALTS(bs1, rs)
+ − 560
}
591
+ − 561
+ − 562
}
+ − 563
case r => r
492
+ − 564
}
431
+ − 565
}
+ − 566
591
+ − 567
def strongBsimp5(r: ARexp): ARexp =
+ − 568
{
+ − 569
// println("was this called?")
+ − 570
r match {
+ − 571
case ASEQ(bs1, r1, r2) => (strongBsimp5(r1), strongBsimp5(r2)) match {
494
+ − 572
case (AZERO, _) => AZERO
+ − 573
case (_, AZERO) => AZERO
+ − 574
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 575
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
591
+ − 576
}
+ − 577
case AALTS(bs1, rs) => {
+ − 578
// println("alts case")
494
+ − 579
val rs_simp = rs.map(strongBsimp5(_))
+ − 580
val flat_res = flats(rs_simp)
500
+ − 581
var dist_res = distinctBy5(flat_res)//distinctBy(flat_res, erase)
518
+ − 582
// var dist2_res = distinctBy5(dist_res)
+ − 583
// while(dist_res != dist2_res){
+ − 584
// dist_res = dist2_res
+ − 585
// dist2_res = distinctBy5(dist_res)
+ − 586
// }
+ − 587
(dist_res) match {
+ − 588
case Nil => AZERO
+ − 589
case s :: Nil => fuse(bs1, s)
+ − 590
case rs => AALTS(bs1, rs)
500
+ − 591
}
591
+ − 592
}
+ − 593
case r => r
518
+ − 594
}
+ − 595
}
+ − 596
591
+ − 597
def strongBsimp6(r: ARexp): ARexp =
+ − 598
{
+ − 599
// println("was this called?")
+ − 600
r match {
+ − 601
case ASEQ(bs1, r1, r2) => (strongBsimp6(r1), strongBsimp6(r2)) match {
518
+ − 602
case (AZERO, _) => AZERO
+ − 603
case (_, AZERO) => AZERO
+ − 604
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
530
+ − 605
case (r1s, AONE(bs2)) => fuse(bs1, r1s) //asserted bs2 == Nil
532
+ − 606
//case (r1s, r2s) if(isOne(erase(r1s))) => fuse(bs1 ++ mkepsBC(r1s), r2s)
518
+ − 607
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
591
+ − 608
}
+ − 609
case AALTS(bs1, rs) => {
+ − 610
// println("alts case")
518
+ − 611
val rs_simp = rs.map(strongBsimp6(_))
+ − 612
val flat_res = flats(rs_simp)
+ − 613
var dist_res = distinctBy6(flat_res)//distinctBy(flat_res, erase)
+ − 614
(dist_res) match {
494
+ − 615
case Nil => AZERO
+ − 616
case s :: Nil => fuse(bs1, s)
+ − 617
case rs => AALTS(bs1, rs)
+ − 618
}
591
+ − 619
}
+ − 620
//(erase(r0))) => AONE(bs)
+ − 621
case r => r
494
+ − 622
}
532
+ − 623
}
+ − 624
591
+ − 625
+ − 626
def atMostEmpty(r: Rexp) : Boolean = r match {
+ − 627
case ZERO => true
+ − 628
case ONE => true
+ − 629
case STAR(r) => atMostEmpty(r)
+ − 630
case SEQ(r1, r2) => atMostEmpty(r1) && atMostEmpty(r2)
+ − 631
case ALTS(r1, r2) => atMostEmpty(r1) && atMostEmpty(r2)
+ − 632
case CHAR(_) => false
+ − 633
}
+ − 634
533
+ − 635
591
+ − 636
def isOne(r: Rexp) : Boolean = r match {
+ − 637
case ONE => true
+ − 638
case SEQ(r1, r2) => isOne(r1) && isOne(r2)
+ − 639
case ALTS(r1, r2) => (isOne(r1) || isOne(r2)) && (atMostEmpty(r1) && atMostEmpty(r2))//rs.forall(atMostEmpty) && rs.exists(isOne)
+ − 640
case STAR(r0) => atMostEmpty(r0)
+ − 641
case CHAR(c) => false
+ − 642
case ZERO => false
+ − 643
}
+ − 644
+ − 645
def distinctWith(rs: List[ARexp],
+ − 646
pruneFunction: (ARexp, Set[Rexp]) => ARexp,
+ − 647
acc: Set[Rexp] = Set()) : List[ARexp] =
+ − 648
rs match{
+ − 649
case Nil => Nil
+ − 650
case r :: rs =>
+ − 651
if(acc(erase(r)))
+ − 652
distinctWith(rs, pruneFunction, acc)
+ − 653
else {
+ − 654
val pruned_r = pruneFunction(r, acc)
+ − 655
pruned_r ::
+ − 656
distinctWith(rs,
+ − 657
pruneFunction,
+ − 658
turnIntoTerms(erase(pruned_r)) ++: acc
+ − 659
)
+ − 660
}
+ − 661
}
532
+ − 662
591
+ − 663
//r = r' ~ tail' : If tail' matches tail => returns r'
628
+ − 664
def removeSeqTail(r: Rexp, tail: Rexp) : Rexp =
+ − 665
if (r == tail)
+ − 666
ONE
+ − 667
else {
+ − 668
r match {
+ − 669
case SEQ(r1, r2) =>
+ − 670
if(r2 == tail)
+ − 671
r1
+ − 672
else
+ − 673
ZERO
+ − 674
case r => ZERO
+ − 675
}
+ − 676
}
532
+ − 677
591
+ − 678
def prune(r: ARexp, acc: Set[Rexp]) : ARexp = r match{
628
+ − 679
case AALTS(bs, rs) => rs.map(r => prune(r, acc)).filter(_ != AZERO) match
591
+ − 680
{
+ − 681
//all components have been removed, meaning this is effectively a duplicate
+ − 682
//flats will take care of removing this AZERO
+ − 683
case Nil => AZERO
+ − 684
case r::Nil => fuse(bs, r)
+ − 685
case rs1 => AALTS(bs, rs1)
+ − 686
}
+ − 687
case ASEQ(bs, r1, r2) =>
+ − 688
//remove the r2 in (ra + rb)r2 to identify the duplicate contents of r1
+ − 689
prune(r1, acc.map(r => removeSeqTail(r, erase(r2)))) match {
+ − 690
//after pruning, returns 0
+ − 691
case AZERO => AZERO
+ − 692
//after pruning, got r1'.r2, where r1' is equal to 1
+ − 693
case r1p if(isOne(erase(r1p))) => fuse(bs ++ mkepsBC(r1p), r2)
+ − 694
//assemble the pruned head r1p with r2
+ − 695
case r1p => ASEQ(bs, r1p, r2)
+ − 696
}
+ − 697
//this does the duplicate component removal task
+ − 698
case r => if(acc(erase(r))) AZERO else r
+ − 699
}
+ − 700
+ − 701
//a stronger version of simp
+ − 702
def bsimpStrong(r: ARexp): ARexp =
+ − 703
{
+ − 704
r match {
+ − 705
case ASEQ(bs1, r1, r2) => (bsimpStrong(r1), bsimpStrong(r2)) match {
+ − 706
//normal clauses same as simp
532
+ − 707
case (AZERO, _) => AZERO
+ − 708
case (_, AZERO) => AZERO
+ − 709
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
591
+ − 710
//bs2 can be discarded
+ − 711
case (r1s, AONE(bs2)) => fuse(bs1, r1s) //assert bs2 == Nil
532
+ − 712
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
591
+ − 713
}
+ − 714
case AALTS(bs1, rs) => {
+ − 715
//distinctBy(flat_res, erase)
+ − 716
distinctWith(flats(rs.map(bsimpStrong(_))), prune) match {
532
+ − 717
case Nil => AZERO
+ − 718
case s :: Nil => fuse(bs1, s)
+ − 719
case rs => AALTS(bs1, rs)
+ − 720
}
591
+ − 721
}
+ − 722
//stars that can be treated as 1
+ − 723
case ASTAR(bs, r0) if(atMostEmpty(erase(r0))) => AONE(bs)
+ − 724
case r => r
+ − 725
}
532
+ − 726
}
431
+ − 727
+ − 728
591
+ − 729
def bders (s: List[Char], r: ARexp) : ARexp = s match {
+ − 730
case Nil => r
+ − 731
case c::s => bders(s, bder(c, r))
+ − 732
}
492
+ − 733
591
+ − 734
def flats(rs: List[ARexp]): List[ARexp] = rs match {
+ − 735
case Nil => Nil
+ − 736
case AZERO :: rs1 => flats(rs1)
+ − 737
case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
+ − 738
case r1 :: rs2 => r1 :: flats(rs2)
+ − 739
}
+ − 740
+ − 741
def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
+ − 742
case Nil => Nil
+ − 743
case (x::xs) => {
+ − 744
val res = f(x)
+ − 745
if (acc.contains(res)) distinctBy(xs, f, acc)
+ − 746
else x::distinctBy(xs, f, res::acc)
492
+ − 747
}
591
+ − 748
}
431
+ − 749
+ − 750
591
+ − 751
def pruneRexp(r: ARexp, allowableTerms: List[Rexp]) : ARexp = {
+ − 752
r match {
+ − 753
case ASEQ(bs, r1, r2) =>
+ − 754
val termsTruncated = allowableTerms.collect(rt => rt match {
+ − 755
case SEQ(r1p, r2p) if(r2p == erase(r2)) => r1p//if(r2p == erase(r2))
+ − 756
})
+ − 757
val pruned : ARexp = pruneRexp(r1, termsTruncated)
+ − 758
pruned match {
+ − 759
case AZERO => AZERO
+ − 760
case AONE(bs1) => fuse(bs ++ bs1, r2)
+ − 761
case pruned1 => ASEQ(bs, pruned1, r2)
+ − 762
}
+ − 763
+ − 764
case AALTS(bs, rs) =>
+ − 765
//allowableTerms.foreach(a => println(shortRexpOutput(a)))
+ − 766
val rsp = rs.map(r =>
+ − 767
pruneRexp(r, allowableTerms)
+ − 768
)
+ − 769
.filter(r => r != AZERO)
+ − 770
rsp match {
+ − 771
case Nil => AZERO
+ − 772
case r1::Nil => fuse(bs, r1)
+ − 773
case rs1 => AALTS(bs, rs1)
+ − 774
}
+ − 775
case r =>
+ − 776
if(allowableTerms.contains(erase(r))) r else AZERO //assert(r != AZERO)
492
+ − 777
}
431
+ − 778
}
591
+ − 779
+ − 780
def oneSimp(r: Rexp) : Rexp = r match {
+ − 781
case SEQ(ONE, r) => r
+ − 782
case SEQ(r1, r2) => SEQ(oneSimp(r1), r2)
+ − 783
case r => r//assert r != 0
+ − 784
+ − 785
}
+ − 786
492
+ − 787
591
+ − 788
def distinctBy4(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 789
case Nil => Nil
+ − 790
case x :: xs =>
+ − 791
//assert(acc.distinct == acc)
+ − 792
val erased = erase(x)
+ − 793
if(acc.contains(erased))
+ − 794
distinctBy4(xs, acc)
+ − 795
else{
+ − 796
val addToAcc = breakIntoTerms(erased).filter(r => !acc.contains(oneSimp(r)))
+ − 797
//val xp = pruneRexp(x, addToAcc)
+ − 798
pruneRexp(x, addToAcc) match {
+ − 799
case AZERO => distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 800
case xPrime => xPrime :: distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 801
}
+ − 802
}
+ − 803
}
494
+ − 804
591
+ − 805
// fun pAKC_aux :: "arexp list ⇒ arexp ⇒ rexp ⇒ arexp"
+ − 806
// where
+ − 807
// "pAKC_aux rsa r ctx = (if (seqFold (SEQ (erase r) ( ctx) )) ⊆ (seqFold (erase (AALTs [] rsa))) then AZERO else
+ − 808
// case r of (ASEQ bs r1 r2) ⇒
+ − 809
// bsimp_ASEQ bs (pAKC_aux rsa r1 (SEQ (erase r2) ( ctx) )) r2 |
+ − 810
// (AALTs bs rs) ⇒
+ − 811
// bsimp_AALTs bs (flts (map (λr. pAKC_aux rsa r ( ctx) ) rs) ) |
+ − 812
// r ⇒ r
+ − 813
+ − 814
// def canonicalSeq(rs: List[Rexp], acc: Rexp) = rs match {
+ − 815
// case r::Nil => SEQ(r, acc)
+ − 816
// case Nil => acc
+ − 817
// case r1::r2::Nil => SEQ(SEQ(r1, r2), acc)
+ − 818
// }
494
+ − 819
+ − 820
591
+ − 821
//the "fake" Language interpretation: just concatenates!
+ − 822
def seqFold(acc: Rexp, rs: List[Rexp]) : Rexp = rs match {
+ − 823
case Nil => acc
+ − 824
case r :: rs1 =>
+ − 825
// if(acc == ONE)
+ − 826
// seqFold(r, rs1)
+ − 827
// else
+ − 828
seqFold(SEQ(acc, r), rs1)
+ − 829
}
494
+ − 830
591
+ − 831
def rprint(r: Rexp) : Unit = println(shortRexpOutput(r))
+ − 832
def rsprint(rs: Iterable[Rexp]) = rs.foreach(r => println(shortRexpOutput(r)))
500
+ − 833
591
+ − 834
def aprint(a: ARexp) = println(shortRexpOutput(erase(a)))
+ − 835
def asprint(as: List[ARexp]) = as.foreach(a => println(shortRexpOutput(erase(a))))
500
+ − 836
591
+ − 837
def pAKC(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
+ − 838
// println("pakc")
+ − 839
// println(shortRexpOutput(erase(r)))
+ − 840
// println("acc")
+ − 841
// rsprint(acc)
+ − 842
// println("ctx---------")
+ − 843
// rsprint(ctx)
+ − 844
// println("ctx---------end")
+ − 845
// rsprint(breakIntoTerms(seqFold(erase(r), ctx)).map(oneSimp))
+ − 846
+ − 847
if (breakIntoTerms(seqFold(erase(r), ctx)).map(oneSimp).forall(acc.contains)) {//acc.flatMap(breakIntoTerms
+ − 848
AZERO
494
+ − 849
}
591
+ − 850
else{
+ − 851
r match {
+ − 852
case ASEQ(bs, r1, r2) =>
+ − 853
(pAKC(acc, r1, erase(r2) :: ctx)) match{
+ − 854
case AZERO =>
+ − 855
AZERO
+ − 856
case AONE(bs1) =>
+ − 857
fuse(bs1, r2)
+ − 858
case r1p =>
+ − 859
ASEQ(bs, r1p, r2)
+ − 860
}
+ − 861
case AALTS(bs, rs0) =>
+ − 862
// println("before pruning")
+ − 863
// println(s"ctx is ")
+ − 864
// ctx.foreach(r => println(shortRexpOutput(r)))
+ − 865
// println(s"rs0 is ")
+ − 866
// rs0.foreach(r => println(shortRexpOutput(erase(r))))
+ − 867
// println(s"acc is ")
+ − 868
// acc.foreach(r => println(shortRexpOutput(r)))
+ − 869
rs0.map(r => pAKC(acc, r, ctx)).filter(_ != AZERO) match {
+ − 870
case Nil =>
+ − 871
// println("after pruning Nil")
+ − 872
AZERO
+ − 873
case r :: Nil =>
+ − 874
// println("after pruning singleton")
+ − 875
// println(shortRexpOutput(erase(r)))
+ − 876
r
+ − 877
case rs0p =>
+ − 878
// println("after pruning non-singleton")
+ − 879
AALTS(bs, rs0p)
+ − 880
}
+ − 881
case r => r
494
+ − 882
}
591
+ − 883
}
494
+ − 884
}
+ − 885
591
+ − 886
def distinctBy5(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 887
case Nil =>
+ − 888
Nil
+ − 889
case x :: xs => {
+ − 890
val erased = erase(x)
+ − 891
if(acc.contains(erased)){
494
+ − 892
distinctBy5(xs, acc)
591
+ − 893
}
+ − 894
else{
+ − 895
val pruned = pAKC(acc, x, Nil)
+ − 896
val newTerms = breakIntoTerms(erase(pruned))
+ − 897
pruned match {
+ − 898
case AZERO =>
+ − 899
distinctBy5(xs, acc)
+ − 900
case xPrime =>
+ − 901
xPrime :: distinctBy5(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 902
}
+ − 903
}
494
+ − 904
}
+ − 905
}
431
+ − 906
+ − 907
591
+ − 908
def isOne1(r: Rexp) : Boolean = r match {
+ − 909
case ONE => true
+ − 910
case SEQ(r1, r2) => false//isOne1(r1) && isOne1(r2)
+ − 911
case ALTS(r1, r2) => false//(isOne1(r1) || isOne1(r2)) && (atMostEmpty(r1) && atMostEmpty(r2))//rs.forall(atMostEmpty) && rs.exists(isOne)
+ − 912
case STAR(r0) => false//atMostEmpty(r0)
+ − 913
case CHAR(c) => false
+ − 914
case ZERO => false
+ − 915
+ − 916
}
+ − 917
+ − 918
def turnIntoTerms(r: Rexp): List[Rexp] = r match {
628
+ − 919
case SEQ(r1, r2) =>
+ − 920
turnIntoTerms(r1).flatMap(r11 => furtherSEQ(r11, r2))
+ − 921
case ALTS(r1, r2) => turnIntoTerms(r1) ::: turnIntoTerms(r2)
+ − 922
case ZERO => Nil
+ − 923
case _ => r :: Nil
591
+ − 924
}
+ − 925
+ − 926
def furtherSEQ(r11: Rexp, r2: Rexp) : List[Rexp] = {
+ − 927
if(r11 == ONE)
+ − 928
turnIntoTerms(r2)
+ − 929
else
+ − 930
SEQ(r11, r2) :: Nil
+ − 931
}
+ − 932
+ − 933
def attachCtx(r: ARexp, ctx: List[Rexp]) : Set[Rexp] = {
+ − 934
turnIntoTerms((seqFold(erase(r), ctx)))
+ − 935
.toSet
+ − 936
}
+ − 937
+ − 938
def attachCtxcc(r: Rexp, ctx: List[Rexp]) : Set[Rexp] =
+ − 939
turnIntoTerms(seqFold(r, ctx)).toSet
+ − 940
+ − 941
def ABIncludedByC[A, B, C](a: A, b: B, c: C, f: (A, B) => C,
+ − 942
subseteqPred: (C, C) => Boolean) : Boolean = {
+ − 943
subseteqPred(f(a, b), c)
+ − 944
}
+ − 945
def rs1_subseteq_rs2(rs1: Set[Rexp], rs2: Set[Rexp]) : Boolean =
+ − 946
//rs1 \subseteq rs2
+ − 947
rs1.forall(rs2.contains)
+ − 948
+ − 949
def prune6(acc: Set[Rexp], r: ARexp, ctx: List[Rexp] = Nil) : ARexp = {
+ − 950
if (ABIncludedByC(a = r, b = ctx, c = acc,
+ − 951
f = attachCtx, subseteqPred = rs1_subseteq_rs2))
+ − 952
{
+ − 953
AZERO
+ − 954
}
+ − 955
else{
+ − 956
r match {
+ − 957
case ASEQ(bs, r1, r2) => (prune6(acc, r1, erase(r2) :: ctx)) match{
+ − 958
case AZERO =>
+ − 959
AZERO
+ − 960
case AONE(bs1) => //when r1 becomes 1, chances to further prune r2
+ − 961
prune6(acc, fuse(bs1, r2), ctx)
+ − 962
case r1p =>
+ − 963
ASEQ(bs, r1p, r2)
+ − 964
}
+ − 965
case AALTS(bs, rs0) =>
+ − 966
rs0.map(r => prune6(acc, r, ctx)).filter(_ != AZERO) match {
+ − 967
case Nil =>
+ − 968
AZERO
+ − 969
case r :: Nil =>
+ − 970
fuse(bs, r)
+ − 971
case rs0p =>
+ − 972
AALTS(bs, rs0p)
+ − 973
}
+ − 974
case r => r
+ − 975
}
+ − 976
}
+ − 977
}
431
+ − 978
591
+ − 979
def prune6cc(acc: Set[Rexp], r: Rexp, ctx: List[Rexp]) : Rexp = {
+ − 980
if (ABIncludedByC(a = r, b = ctx, c = acc,
+ − 981
f = attachCtxcc, subseteqPred = rs1_subseteq_rs2))
+ − 982
{//acc.flatMap(breakIntoTerms
+ − 983
ZERO
+ − 984
}
+ − 985
else{
+ − 986
r match {
+ − 987
case SEQ(r1, r2) =>
+ − 988
(prune6cc(acc, r1, r2 :: ctx)) match{
+ − 989
case ZERO =>
+ − 990
ZERO
+ − 991
case ONE =>
+ − 992
r2
+ − 993
case r1p =>
+ − 994
SEQ(r1p, r2)
+ − 995
}
+ − 996
case ALTS(r1, r2) =>
+ − 997
List(r1, r2).map(r => prune6cc(acc, r, ctx)).filter(_ != AZERO) match {
+ − 998
case Nil =>
+ − 999
ZERO
+ − 1000
case r :: Nil =>
+ − 1001
r
+ − 1002
case ra :: rb :: Nil =>
+ − 1003
ALTS(ra, rb)
+ − 1004
}
+ − 1005
case r => r
+ − 1006
}
+ − 1007
}
+ − 1008
}
+ − 1009
+ − 1010
def distinctBy6(xs: List[ARexp], acc: Set[Rexp] = Set()) :
+ − 1011
List[ARexp] = xs match {
+ − 1012
case Nil =>
+ − 1013
Nil
+ − 1014
case x :: xs => {
+ − 1015
val erased = erase(x)
+ − 1016
if(acc.contains(erased)){
+ − 1017
distinctBy6(xs, acc)
+ − 1018
}
+ − 1019
else{
+ − 1020
val pruned = prune6(acc, x)
+ − 1021
val newTerms = turnIntoTerms(erase(pruned))
+ − 1022
pruned match {
+ − 1023
case AZERO =>
+ − 1024
distinctBy6(xs, acc)
+ − 1025
case xPrime =>
+ − 1026
xPrime :: distinctBy6(xs, newTerms ++: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 1027
}
+ − 1028
}
+ − 1029
}
+ − 1030
}
+ − 1031
+ − 1032
def distinctByacc(xs: List[Rexp], acc: Set[Rexp] = Set()) : Set[Rexp] = xs match {
+ − 1033
case Nil =>
+ − 1034
acc
+ − 1035
case x :: xs => {
+ − 1036
if(acc.contains(x)){
+ − 1037
distinctByacc(xs, acc)
+ − 1038
}
+ − 1039
else{
+ − 1040
val pruned = prune6cc(acc, x, Nil)
+ − 1041
val newTerms = turnIntoTerms(pruned)
+ − 1042
pruned match {
+ − 1043
case ZERO =>
+ − 1044
distinctByacc(xs, acc)
+ − 1045
case xPrime =>
+ − 1046
distinctByacc(xs, newTerms.map(oneSimp) ++: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 1047
}
+ − 1048
}
+ − 1049
}
+ − 1050
}
+ − 1051
+ − 1052
def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
+ − 1053
case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
+ − 1054
case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
+ − 1055
case ZERO => Nil
+ − 1056
case _ => r::Nil
+ − 1057
}
+ − 1058
+ − 1059
def flatsIntoTerms(r: Rexp) : List[Rexp] = r match {
+ − 1060
//case SEQ(r1, r2) => flatsIntoTerms(r1).map(r11 => SEQ(r11, r2))
+ − 1061
case ALTS(r1, r2) => flatsIntoTerms(r1) ::: flatsIntoTerms(r2)
+ − 1062
case ZERO => Nil
+ − 1063
case _ => r::Nil
+ − 1064
}
+ − 1065
+ − 1066
+ − 1067
def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
+ − 1068
case (ONE, bs) => (Empty, bs)
+ − 1069
case (CHAR(f), bs) => (Chr(f), bs)
+ − 1070
case (ALTS(r1, r2), Z::bs1) => {
+ − 1071
val (v, bs2) = decode_aux(r1, bs1)
+ − 1072
(Left(v), bs2)
+ − 1073
}
+ − 1074
case (ALTS(r1, r2), S::bs1) => {
+ − 1075
val (v, bs2) = decode_aux(r2, bs1)
+ − 1076
(Right(v), bs2)
+ − 1077
}
+ − 1078
case (SEQ(r1, r2), bs) => {
+ − 1079
val (v1, bs1) = decode_aux(r1, bs)
+ − 1080
val (v2, bs2) = decode_aux(r2, bs1)
+ − 1081
(Sequ(v1, v2), bs2)
+ − 1082
}
+ − 1083
case (STAR(r1), S::bs) => {
+ − 1084
val (v, bs1) = decode_aux(r1, bs)
+ − 1085
//(v)
+ − 1086
val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
+ − 1087
(Stars(v::vs), bs2)
+ − 1088
}
+ − 1089
case (STAR(_), Z::bs) => (Stars(Nil), bs)
+ − 1090
case (RECD(x, r1), bs) => {
+ − 1091
val (v, bs1) = decode_aux(r1, bs)
+ − 1092
(Rec(x, v), bs1)
+ − 1093
}
+ − 1094
case (NOT(r), bs) => (Nots(r.toString), bs)
+ − 1095
}
+ − 1096
+ − 1097
def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
+ − 1098
case (v, Nil) => v
+ − 1099
case _ => throw new Exception("Not decodable")
+ − 1100
}
492
+ − 1101
+ − 1102
+ − 1103
591
+ − 1104
def blexing_simp(r: Rexp, s: String) : Val = {
+ − 1105
val bit_code = blex_simp(internalise(r), s.toList)
+ − 1106
decode(r, bit_code)
+ − 1107
}
+ − 1108
def simpBlexer(r: Rexp, s: String) : Val = {
+ − 1109
Try(blexing_simp(r, s)).getOrElse(Failure)
+ − 1110
}
431
+ − 1111
591
+ − 1112
def strong_blexing_simp(r: Rexp, s: String) : Val = {
+ − 1113
decode(r, strong_blex_simp(internalise(r), s.toList))
+ − 1114
}
431
+ − 1115
591
+ − 1116
def strong_blexing_simp5(r: Rexp, s: String) : Val = {
+ − 1117
decode(r, strong_blex_simp5(internalise(r), s.toList))
492
+ − 1118
}
431
+ − 1119
+ − 1120
492
+ − 1121
+ − 1122
591
+ − 1123
//def blexerStrong(r: ARexp, s: String) : Val = {
+ − 1124
// Try(strong_blexing_simp(r, s)).getOrElse(Failure)
+ − 1125
//}
+ − 1126
def strongBlexer5(r: Rexp, s: String): Val = {
+ − 1127
Try(strong_blexing_simp5(r, s)).getOrElse(Failure)
+ − 1128
}
+ − 1129
+ − 1130
def strongBlexer(r: Rexp, s: String) : Option[Val] = {
+ − 1131
Try(Some(decode(r, strong_blex_simp(internalise(r), s.toList)))).getOrElse(None)
+ − 1132
}
+ − 1133
def strong_blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 1134
case Nil => {
+ − 1135
if (bnullable(r)) {
+ − 1136
mkepsBC(r)
+ − 1137
}
+ − 1138
else
+ − 1139
throw new Exception("Not matched")
+ − 1140
}
+ − 1141
case c::cs => {
+ − 1142
strong_blex_simp(strongBsimp(bder(c, r)), cs)
+ − 1143
}
+ − 1144
}
+ − 1145
+ − 1146
def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match {
+ − 1147
case Nil => {
+ − 1148
if (bnullable(r)) {
+ − 1149
val bits = mkepsBC(r)
+ − 1150
bits
+ − 1151
}
+ − 1152
else
+ − 1153
throw new Exception("Not matched")
+ − 1154
}
+ − 1155
case c::cs => {
+ − 1156
val der_res = bder(c,r)
+ − 1157
val simp_res = strongBsimp5(der_res)
+ − 1158
strong_blex_simp5(simp_res, cs)
+ − 1159
}
+ − 1160
}
+ − 1161
+ − 1162
+ − 1163
def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
+ − 1164
case Nil => r
+ − 1165
case c::s =>
+ − 1166
//println(erase(r))
+ − 1167
bders_simp(s, bsimp(bder(c, r)))
+ − 1168
}
+ − 1169
+ − 1170
def bdersStrong5(s: List[Char], r: ARexp) : ARexp = s match {
+ − 1171
case Nil => r
+ − 1172
case c::s => bdersStrong5(s, strongBsimp5(bder(c, r)))
+ − 1173
}
+ − 1174
def bdersStrong6(s: List[Char], r: ARexp) : ARexp = s match {
+ − 1175
case Nil => r
+ − 1176
case c::s => bdersStrong6(s, strongBsimp6(bder(c, r)))
+ − 1177
}
+ − 1178
//Conjecture: [| bdersStrong(s, r) |] = O([| r |]^3)
+ − 1179
def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
+ − 1180
case Nil => r
+ − 1181
case c::s => bdersStrong(s, bsimpStrong(bder(c, r)))
+ − 1182
}
+ − 1183
def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
+ − 1184
+ − 1185
//def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
+ − 1186
// case Nil => r
+ − 1187
// case c::s => bdersStrong(s, strongBsimp(bder(c, r)))
+ − 1188
//}
+ − 1189
+ − 1190
def bdersStrongRexp(s: String, r: Rexp) : ARexp = bdersStrong(s.toList, internalise(r))
+ − 1191
+ − 1192
def erase(r:ARexp): Rexp = r match{
+ − 1193
case AZERO => ZERO
+ − 1194
case AONE(_) => ONE
+ − 1195
case ACHAR(bs, c) => CHAR(c)
+ − 1196
case AALTS(bs, Nil) => ZERO
+ − 1197
case AALTS(bs, a::Nil) => erase(a)
+ − 1198
case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
+ − 1199
case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
+ − 1200
case ASTAR(cs, r)=> STAR(erase(r))
+ − 1201
case ANOT(bs, r) => NOT(erase(r))
+ − 1202
case AANYCHAR(bs) => ANYCHAR
+ − 1203
}
+ − 1204
+ − 1205
+ − 1206
def allCharSeq(r: Rexp) : Boolean = r match {
+ − 1207
case CHAR(c) => true
+ − 1208
case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
+ − 1209
case _ => false
+ − 1210
}
+ − 1211
+ − 1212
def flattenSeq(r: Rexp) : String = r match {
+ − 1213
case CHAR(c) => c.toString
+ − 1214
case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
+ − 1215
case _ => throw new Error("flatten unflattenable rexp")
+ − 1216
}
+ − 1217
+ − 1218
def shortRexpOutput(r: Rexp) : String = r match {
+ − 1219
case CHAR(c) => c.toString
+ − 1220
case ONE => "1"
+ − 1221
case ZERO => "0"
+ − 1222
case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 1223
case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 1224
case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
+ − 1225
case STAR(STAR(r)) => "(..)*"// ++ shortRexpOutput(r) ++ "]*"
+ − 1226
case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
+ − 1227
//case RTOP => "RTOP"
+ − 1228
}
+ − 1229
+ − 1230
+ − 1231
def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 1232
case Nil => {
+ − 1233
if (bnullable(r)) {
+ − 1234
val bits = mkepsBC(r)
+ − 1235
bits
+ − 1236
}
+ − 1237
else
+ − 1238
throw new Exception("Not matched")
+ − 1239
}
+ − 1240
case c::cs => {
+ − 1241
val der_res = bder(c,r)
+ − 1242
val simp_res = bsimp(der_res)
+ − 1243
blex_simp(simp_res, cs)
+ − 1244
}
+ − 1245
}
+ − 1246
+ − 1247
+ − 1248
+ − 1249
+ − 1250
def size(r: Rexp) : Int = r match {
+ − 1251
case ZERO => 1
+ − 1252
case ONE => 1
+ − 1253
case CHAR(_) => 1
+ − 1254
case ANYCHAR => 1
+ − 1255
case NOT(r0) => 1 + size(r0)
+ − 1256
case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 1257
case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
+ − 1258
case STAR(r) => 1 + size(r)
+ − 1259
}
+ − 1260
+ − 1261
def asize(a: ARexp) = size(erase(a))
+ − 1262
+ − 1263
//pder related
+ − 1264
type Mon = (Char, Rexp)
+ − 1265
type Lin = Set[Mon]
+ − 1266
+ − 1267
def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
+ − 1268
case ZERO => Set()
+ − 1269
case ONE => rs
+ − 1270
case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) )
+ − 1271
}
+ − 1272
def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
+ − 1273
case ZERO => Set()
+ − 1274
// case ONE => l
+ − 1275
case t => l.map( m => m._2 match
+ − 1276
{
+ − 1277
case ZERO => m
+ − 1278
case ONE => (m._1, t)
+ − 1279
case p => (m._1, SEQ(p, t))
+ − 1280
}
+ − 1281
+ − 1282
)
+ − 1283
}
+ − 1284
def cir_prodList(l : List[Mon], t: Rexp): List[Mon] = t match {
+ − 1285
case ZERO => Nil
+ − 1286
case ONE => l
+ − 1287
case t => l.map(m => m._2 match
+ − 1288
{
+ − 1289
case ZERO => m
+ − 1290
case ONE => (m._1, t)
+ − 1291
case p => (m._1, SEQ(p, t))
+ − 1292
}
+ − 1293
)
+ − 1294
+ − 1295
}
+ − 1296
def lfList(r: Rexp) : List[Mon] = r match {
+ − 1297
case ZERO => Nil
+ − 1298
case ONE => Nil
+ − 1299
case CHAR(f) => {
+ − 1300
(f, ONE) :: Nil
+ − 1301
}
+ − 1302
case ALTS(r1, r2) => {
+ − 1303
lfList(r1) ++ lfList(r2)
+ − 1304
}
+ − 1305
case STAR(r1) => cir_prodList(lfList(r1), STAR(r1))
+ − 1306
case SEQ(r1, r2) => {
+ − 1307
if(nullable(r1))
+ − 1308
cir_prodList(lfList(r1), r2) ++ lfList(r2)
+ − 1309
else
+ − 1310
cir_prodList(lfList(r1), r2)
+ − 1311
}
+ − 1312
}
+ − 1313
def lfprint(ls: Lin) = ls.foreach(m =>{
+ − 1314
println(m._1)
+ − 1315
rprint(m._2)
+ − 1316
})
+ − 1317
def lf(r: Rexp): Lin = r match {
+ − 1318
case ZERO => Set()
+ − 1319
case ONE => Set()
+ − 1320
case CHAR(f) => {
+ − 1321
//val Some(c) = alphabet.find(f)
+ − 1322
Set((f, ONE))
+ − 1323
}
+ − 1324
case ALTS(r1, r2) => {
+ − 1325
lf(r1 ) ++ lf(r2)
+ − 1326
}
+ − 1327
case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
+ − 1328
case SEQ(r1, r2) =>{
+ − 1329
if (nullable(r1))
+ − 1330
cir_prod(lf(r1), r2) ++ lf(r2)
+ − 1331
else
+ − 1332
cir_prod(lf(r1), r2)
+ − 1333
}
+ − 1334
}
+ − 1335
def lfs(r: Set[Rexp]): Lin = {
+ − 1336
r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
431
+ − 1337
}
+ − 1338
591
+ − 1339
def pder(x: Char, t: Rexp): Set[Rexp] = {
+ − 1340
val lft = lf(t)
+ − 1341
(lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
543
+ − 1342
}
591
+ − 1343
def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
+ − 1344
case x::xs => pders(xs, pder(x, t))
+ − 1345
case Nil => Set(t)
543
+ − 1346
}
591
+ − 1347
def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
+ − 1348
case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 1349
case Nil => ts
543
+ − 1350
}
591
+ − 1351
def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] =
+ − 1352
ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
+ − 1353
def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
+ − 1354
//all implementation of partial derivatives that involve set union are potentially buggy
+ − 1355
//because they don't include the original regular term before they are pdered.
+ − 1356
//now only pderas is fixed.
+ − 1357
def pderas(t: Set[Rexp], d: Int): Set[Rexp] =
+ − 1358
if(d > 0)
+ − 1359
pderas(lfs(t).map(mon => mon._2), d - 1) ++ t
+ − 1360
else
+ − 1361
lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
431
+ − 1362
def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
+ − 1363
def awidth(r: Rexp) : Int = r match {
+ − 1364
case CHAR(c) => 1
+ − 1365
case SEQ(r1, r2) => awidth(r1) + awidth(r2)
+ − 1366
case ALTS(r1, r2) => awidth(r1) + awidth(r2)
+ − 1367
case ONE => 0
+ − 1368
case ZERO => 0
+ − 1369
case STAR(r) => awidth(r)
+ − 1370
}
+ − 1371
//def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
+ − 1372
//def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
+ − 1373
def o(r: Rexp) = if (nullable(r)) ONE else ZERO
+ − 1374
//def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
+ − 1375
def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
+ − 1376
case ZERO => Set[Rexp]()
+ − 1377
case ONE => Set[Rexp]()
+ − 1378
case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
+ − 1379
case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
+ − 1380
case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
+ − 1381
case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
+ − 1382
}
+ − 1383
def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
+ − 1384
case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 1385
case Nil => ts
+ − 1386
}
+ − 1387
def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
+ − 1388
+ − 1389
+ − 1390
591
+ − 1391
def starPrint(r: Rexp) : Unit = r match {
+ − 1392
+ − 1393
case SEQ(head, rstar) =>
+ − 1394
println(shortRexpOutput(head) ++ "~STARREG")
+ − 1395
case STAR(rstar) =>
+ − 1396
println("STARREG")
+ − 1397
case ALTS(r1, r2) =>
+ − 1398
println("(")
+ − 1399
starPrint(r1)
+ − 1400
println("+")
+ − 1401
starPrint(r2)
+ − 1402
println(")")
+ − 1403
case ZERO => println("0")
+ − 1404
}
+ − 1405
+ − 1406
// @arg(doc = "small tests")
+ − 1407
def n_astar_list(d: Int) : Rexp = {
+ − 1408
if(d == 0) STAR("a")
+ − 1409
else ALTS(STAR("a" * d), n_astar_list(d - 1))
+ − 1410
}
+ − 1411
def n_astar_alts(d: Int) : Rexp = d match {
+ − 1412
case 0 => n_astar_list(0)
+ − 1413
case d => STAR(n_astar_list(d))
+ − 1414
}
+ − 1415
def n_astar_alts2(d: Int) : Rexp = d match {
+ − 1416
case 0 => n_astar_list(0)
+ − 1417
case d => STAR(STAR(n_astar_list(d)))
+ − 1418
}
+ − 1419
def n_astar_aux(d: Int) = {
+ − 1420
if(d == 0) n_astar_alts(0)
+ − 1421
else ALTS(n_astar_alts(d), n_astar_alts(d - 1))
+ − 1422
}
+ − 1423
+ − 1424
def n_astar(d: Int) : Rexp = STAR(n_astar_aux(d))
+ − 1425
//val STARREG = n_astar(3)
+ − 1426
// ( STAR("a") |
+ − 1427
// ("a" | "aa").% |
+ − 1428
// ( "a" | "aa" | "aaa").%
+ − 1429
// ).%
+ − 1430
//( "a" | "aa" | "aaa" | "aaaa").% |
+ − 1431
//( "a" | "aa" | "aaa" | "aaaa" | "aaaaa").%
+ − 1432
(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa"| ( STAR("aaaaaaa")) | STAR("aaaaaaaaaaa"))).%).%).%
+ − 1433
+ − 1434
// @main
+ − 1435
+ − 1436
def lcm(list: Seq[Int]):Int=list.foldLeft(1:Int){
+ − 1437
(a, b) => b * a /
+ − 1438
Stream.iterate((a,b)){case (x,y) => (y, x%y)}.dropWhile(_._2 != 0).head._1.abs
+ − 1439
}
+ − 1440
+ − 1441
def small() = {
+ − 1442
//val pderSTAR = pderUNIV(STARREG)
+ − 1443
+ − 1444
//val refSize = pderSTAR.map(size(_)).sum
+ − 1445
for(n <- 5 to 5){
+ − 1446
val STARREG = n_astar_alts2(n)
+ − 1447
val iMax = (lcm((1 to n).toList))
+ − 1448
for(i <- 0 to iMax ){// 100, 400, 800, 840, 841, 900
+ − 1449
val prog0 = "a" * i
+ − 1450
//println(s"test: $prog0")
+ − 1451
print(i)
+ − 1452
print(" ")
+ − 1453
// print(i)
+ − 1454
// print(" ")
+ − 1455
println(asize(bders_simp(prog0.toList, internalise(STARREG))))
+ − 1456
// println(asize(bdersStrong(prog0.toList, internalise(STARREG))))
431
+ − 1457
}
+ − 1458
591
+ − 1459
}
533
+ − 1460
}
591
+ − 1461
// small()
492
+ − 1462
591
+ − 1463
def aaa_star() = {
+ − 1464
val r = STAR(("a" | "aa"))
+ − 1465
for(i <- 0 to 20) {
+ − 1466
val prog = "a" * i
+ − 1467
print(i+" ")
+ − 1468
println(asize(bders_simp(prog.toList, internalise(r))))
+ − 1469
//println(size(ders_simp(prog.toList, r)))
516
+ − 1470
}
492
+ − 1471
}
591
+ − 1472
// aaa_star()
536
+ − 1473
591
+ − 1474
def matching_ways_counting(n: Int): Int =
573
+ − 1475
{
+ − 1476
if(n == 0) 1
+ − 1477
else if(n == 1) 2
+ − 1478
else (for(i <- 1 to n - 1)
+ − 1479
yield matching_ways_counting(i) * matching_ways_counting(n - i) ).sum + (n + 1)
+ − 1480
}
591
+ − 1481
def naive_matcher() {
+ − 1482
val r = STAR(STAR("a") ~ STAR("a"))
536
+ − 1483
591
+ − 1484
// for(i <- 0 to 10) {
+ − 1485
// val s = "a" * i
+ − 1486
// val t1 = System.nanoTime
+ − 1487
// matcher(s, r)
+ − 1488
// val duration = (System.nanoTime - t1) / 1e9d
+ − 1489
// print(i)
+ − 1490
// print(" ")
+ − 1491
// // print(duration)
+ − 1492
// // print(" ")
+ − 1493
// (aprint(bders_simp(s.toList, internalise(r))))
+ − 1494
// //print(size(ders_simp(s.toList, r)))
+ − 1495
// println()
+ − 1496
// }
+ − 1497
for(i <- 1 to 3) {
+ − 1498
val s = "a" * i
+ − 1499
val t1 = System.nanoTime
+ − 1500
val derssimp_result = ders_simp(s.toList, r)
+ − 1501
println(i)
+ − 1502
println(matching_ways_counting(i))
+ − 1503
println(size(derssimp_result))
+ − 1504
rprint(derssimp_result)
+ − 1505
}
+ − 1506
573
+ − 1507
}
639
+ − 1508
// naive_matcher()
591
+ − 1509
//single(SEQ(SEQ(STAR(CHAR('b')),STAR(STAR(SEQ(CHAR('a'),CHAR('b'))))),
+ − 1510
// SEQ(SEQ(CHAR('b'),STAR(ALTS(CHAR('a'),ONE))),ONE)))
543
+ − 1511
+ − 1512
591
+ − 1513
//STAR(SEQ(ALTS(CHAR(b),STAR(CHAR(b))),ALTS(ALTS(ONE,CHAR(b)),SEQ(CHAR(c),ONE))))
+ − 1514
def generator_test() {
492
+ − 1515
591
+ − 1516
test(single(STAR(SEQ(ALTS(STAR(CHAR('b')),ALTS(CHAR('b'),CHAR('a'))),ALTS(ALTS(CHAR('b'),CHAR('c')),STAR(ZERO))))), 1) { (r: Rexp) =>
+ − 1517
// ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) =>
+ − 1518
val ss = Set("ab")//stringsFromRexp(r)
+ − 1519
val boolList = ss.map(s => {
+ − 1520
//val bdStrong = bdersStrong(s.toList, internalise(r))
+ − 1521
val bdStrong6 = bdersStrong6(s.toList, internalise(r))
+ − 1522
val bdStrong6Set = breakIntoTerms(erase(bdStrong6))
+ − 1523
val pdersSet = pderUNIV(r)//.flatMap(r => turnIntoTerms(r))
+ − 1524
val pdersSetBroken = pdersSet.flatMap(r => turnIntoTerms(r))
+ − 1525
(rs1_subseteq_rs2(bdStrong6Set.toSet, distinctByacc(pdersSet.toList)) ||
+ − 1526
bdStrong6Set.size <= pdersSet.size || bdStrong6Set.size <= pdersSetBroken.size ||
+ − 1527
rs1_subseteq_rs2(bdStrong6Set.toSet, pdersSet union pdersSetBroken))//|| bdStrong6Set.size <= pdersSetBroken.size
+ − 1528
+ − 1529
})
+ − 1530
//!boolList.exists(b => b == false)
+ − 1531
!boolList.exists(b => b == false)
+ − 1532
}
+ − 1533
}
+ − 1534
// small()
+ − 1535
// generator_test()
+ − 1536
543
+ − 1537
+ − 1538
591
+ − 1539
//STAR(STAR(STAR(SEQ(SEQ(ALTS(STAR(ALTS(ONE,CHAR('c'))),
+ − 1540
// CHAR('c')),ALTS(CHAR('a'),ALTS(STAR(CHAR('b')),ALTS(CHAR('a'),ZERO)))),
+ − 1541
// CHAR('c')))))//SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE)))//STAR(SEQ(ALTS(STAR(CHAR('c')),CHAR('c')),SEQ(ALTS(CHAR('c'),ONE),ONE)))
+ − 1542
//counterexample1: STAR(SEQ(ALTS(STAR(ZERO),ALTS(CHAR(a),CHAR(b))),SEQ(ONE,ALTS(CHAR(a),CHAR(b)))))
+ − 1543
//counterexample2: SEQ(ALTS(SEQ(CHAR(a),STAR(ONE)),STAR(ONE)),ALTS(CHAR(a),SEQ(ALTS(CHAR(c),CHAR(a)),CHAR(b))))
541
+ − 1544
591
+ − 1545
//new ce1 : STAR(SEQ(ALTS(ALTS(ONE,CHAR(a)),SEQ(ONE,CHAR(b))),ALTS(CHAR(a),ALTS(CHAR(b),CHAR(a)))))
+ − 1546
//new ce2 : ALTS(CHAR(b),SEQ(ALTS(ZERO,ALTS(CHAR(b),CHAR(b))),ALTS(ALTS(CHAR(a),CHAR(b)),SEQ(CHAR(c),ONE))))
+ − 1547
//new ce3 : SEQ(CHAR(b),ALTS(ALTS(ALTS(ONE,CHAR(a)),SEQ(CHAR(c),ONE)),SEQ(STAR(ZERO),SEQ(ONE,CHAR(b)))))
+ − 1548
//circular double-nullable STAR(SEQ((STAR("b") | "a" | "b"), ("b" | ONE)))
+ − 1549
def counterexample_check() {
628
+ − 1550
val r : Rexp = SEQ(ALTS(ONE,
+ − 1551
SEQ(ALTS(CHAR('f'), CHAR('b')), CHAR('g'))), SEQ(CHAR('a'), SEQ(CHAR('d'), CHAR('e'))))//(1+(f +b)·g)·(a·(d·e))//STAR(SEQ(ALTS(STAR(CHAR('b')),ALTS(CHAR('b'),CHAR('a'))),
+ − 1552
val acc : Set[Rexp] = Set(SEQ(CHAR('a'), SEQ(CHAR('d'), CHAR('e'))), SEQ(SEQ(CHAR('f'), CHAR('g')), SEQ(CHAR('a'), SEQ(CHAR('d'), CHAR('e')))) )
+ − 1553
val a = internalise(r)
+ − 1554
aprint(prune(a, acc));
+ − 1555
//ALTS(ALTS(CHAR('b'),CHAR('c')),STAR(ZERO))))//SEQ(SEQ(STAR(ALTS(CHAR('a'),CHAR('b'))),
591
+ − 1556
//ALTS(ALTS(CHAR('c'),CHAR('b')),STAR(ONE))),STAR(CHAR('b')))
628
+ − 1557
// val s = "ab"
+ − 1558
// val bdStrong5 = bdersStrong6(s.toList, internalise(r))
+ − 1559
// val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
+ − 1560
// val pdersSet = pderUNIV(r)//.map(r => erase(bsimp(internalise(r))))//.flatMap(r => turnIntoTerms(r))//.map(oneSimp).flatMap(r => breakIntoTerms(r))
+ − 1561
// val apdersSet = pdersSet.map(internalise)
+ − 1562
// println("original regex ")
+ − 1563
// rprint(r)
+ − 1564
// println("after strong bsimp")
+ − 1565
// aprint(bdStrong5)
+ − 1566
// println("turned into a set %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ")
+ − 1567
// rsprint(bdStrong5Set)
+ − 1568
// println("after pderUNIV")
+ − 1569
// rsprint(pdersSet.toList)
+ − 1570
// println("pderUNIV distinctBy6")
+ − 1571
// //asprint(distinctBy6(apdersSet.toList))
+ − 1572
// rsprint((pdersSet.toList).flatMap(turnIntoTerms))
591
+ − 1573
// rsprint(turnIntoTerms(pdersSet.toList(3)))
+ − 1574
// println("NO 3 not into terms")
+ − 1575
// rprint((pdersSet.toList()))
+ − 1576
// println("after pderUNIV broken")
+ − 1577
// rsprint(pdersSet.flatMap(r => turnIntoTerms(r)).toList)
532
+ − 1578
591
+ − 1579
}
543
+ − 1580
591
+ − 1581
// counterexample_check()
543
+ − 1582
591
+ − 1583
def lf_display() {
+ − 1584
val r = STAR(SEQ(ALTS(STAR(CHAR('b')),ALTS(CHAR('b'),CHAR('a'))),
+ − 1585
ALTS(ALTS(CHAR('b'),CHAR('c')),STAR(ZERO))))//SEQ(SEQ(STAR(ALTS(CHAR('a'),CHAR('b'))),
+ − 1586
//ALTS(ALTS(CHAR('c'),CHAR('b')),STAR(ONE))),STAR(CHAR('b')))
+ − 1587
val s = "ab"
+ − 1588
val bdStrong5 = bdersStrong6(s.toList, internalise(r))
+ − 1589
val bdStrong5Set = breakIntoTerms(erase(bdStrong5))
+ − 1590
val pdersSet = pderUNIV(r)//.map(r => erase(bsimp(internalise(r))))//.flatMap(r => turnIntoTerms(r))//.map(oneSimp).flatMap(r => breakIntoTerms(r))
+ − 1591
val apdersSet = pdersSet.map(internalise)
+ − 1592
lfprint(lf(r))
543
+ − 1593
591
+ − 1594
}
543
+ − 1595
628
+ − 1596
// lf_display()
541
+ − 1597
591
+ − 1598
def breakable(r: Rexp) : Boolean = r match {
+ − 1599
case SEQ(ALTS(_, _), _) => true
+ − 1600
case SEQ(r1, r2) => breakable(r1)
+ − 1601
case _ => false
+ − 1602
}
532
+ − 1603
591
+ − 1604
def linform_test() {
+ − 1605
val r = STAR(SEQ(STAR(CHAR('c')),ONE))//SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE))) //
+ − 1606
val r_linforms = lf(r)
+ − 1607
println(r_linforms.size)
+ − 1608
val pderuniv = pderUNIV(r)
+ − 1609
println(pderuniv.size)
+ − 1610
}
+ − 1611
// linform_test()
+ − 1612
// 1
+ − 1613
def newStrong_test() {
+ − 1614
val r2 = (CHAR('b') | ONE)
+ − 1615
val r0 = CHAR('d')
+ − 1616
val r1 = (ONE | CHAR('c'))
+ − 1617
val expRexp = (SEQ(r2, r0) | SEQ(SEQ(r1, r2), r0))
+ − 1618
println(s"original regex is: ")
+ − 1619
rprint(expRexp)
+ − 1620
val expSimp5 = strongBsimp5(internalise(expRexp))
+ − 1621
val expSimp6 = strongBsimp6(internalise(expRexp))
+ − 1622
aprint(expSimp5)
+ − 1623
aprint(expSimp6)
+ − 1624
}
+ − 1625
// newStrong_test()
+ − 1626
// SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))),
+ − 1627
// SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))),
+ − 1628
// STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))),
+ − 1629
// SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE))
493
+ − 1630
+ − 1631
591
+ − 1632
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))
+ − 1633
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))
543
+ − 1634
+ − 1635
+ − 1636
+ − 1637
+ − 1638
591
+ − 1639
def bders_bderssimp() {
543
+ − 1640
639
+ − 1641
+ − 1642
val r = //(SEQ(ALTS(ONE,CHAR('c')),
+ − 1643
SEQ(SEQ(CHAR('a'),CHAR('a')),ALTS(ONE,CHAR('c')))
591
+ − 1644
// ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) =>
639
+ − 1645
val ss = Set("aa")//stringsFromRexp(r)
591
+ − 1646
val boolList = ss.map(s => {
+ − 1647
//val bdStrong = bdersStrong(s.toList, internalise(r))
639
+ − 1648
val bds = (bders(s.toList, internalise(r)))
591
+ − 1649
val bdssimp = bsimp(bders_simp(s.toList, internalise(r)))
+ − 1650
//val pdersSet = pderUNIV(r)//.flatMap(r => turnIntoTerms(r))
+ − 1651
//val pdersSetBroken = pdersSet.flatMap(r => turnIntoTerms(r))
+ − 1652
//rs1_subseteq_rs2(bdStrong6Set.toSet, distinctByacc(pdersSet.toList))
+ − 1653
//bdStrong6Set.size <= pdersSet.size || bdStrong6Set.size <= pdersSetBroken.size ||
+ − 1654
//rs1_subseteq_rs2(bdStrong6Set.toSet, pdersSet union pdersSetBroken)//|| bdStrong6Set.size <= pdersSetBroken.size
+ − 1655
rprint(r)
639
+ − 1656
println(asize(bds))
+ − 1657
println(asize(bdssimp))
591
+ − 1658
bds == bdssimp
+ − 1659
})
+ − 1660
//!boolList.exists(b => b == false)
+ − 1661
!boolList.exists(b => b == false)
+ − 1662
}
639
+ − 1663
+ − 1664
+ − 1665
+ − 1666
bders_bderssimp()
543
+ − 1667
+ − 1668
591
+ − 1669
639
+ − 1670
// println("hi!!!!!")
+ − 1671
// counterexample_check()