517
+ − 1
// A simple lexer inspired by work of Sulzmann & Lu
+ − 2
//==================================================
+ − 3
//
+ − 4
// Call the test cases with
+ − 5
//
+ − 6
// amm lexer.sc small
+ − 7
// amm lexer.sc fib
+ − 8
// amm lexer.sc loops
+ − 9
// amm lexer.sc email
+ − 10
//
+ − 11
// amm lexer.sc all
+ − 12
+ − 13
+ − 14
+ − 15
// regular expressions including records
+ − 16
abstract class Rexp
+ − 17
case object ZERO extends Rexp
+ − 18
case object ONE extends Rexp
+ − 19
case object ANYCHAR extends Rexp
+ − 20
case class CHAR(c: Char) extends Rexp
+ − 21
case class ALTS(r1: Rexp, r2: Rexp) extends Rexp
+ − 22
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+ − 23
case class STAR(r: Rexp) extends Rexp
+ − 24
case class RECD(x: String, r: Rexp) extends Rexp
+ − 25
case class NTIMES(n: Int, r: Rexp) extends Rexp
+ − 26
case class OPTIONAL(r: Rexp) extends Rexp
+ − 27
case class NOT(r: Rexp) extends Rexp
+ − 28
// records for extracting strings or tokens
+ − 29
+ − 30
// values
+ − 31
abstract class Val
+ − 32
case object Failure extends Val
+ − 33
case object Empty extends Val
+ − 34
case class Chr(c: Char) extends Val
+ − 35
case class Sequ(v1: Val, v2: Val) extends Val
+ − 36
case class Left(v: Val) extends Val
+ − 37
case class Right(v: Val) extends Val
+ − 38
case class Stars(vs: List[Val]) extends Val
+ − 39
case class Rec(x: String, v: Val) extends Val
+ − 40
case class Ntime(vs: List[Val]) extends Val
+ − 41
case class Optionall(v: Val) extends Val
+ − 42
case class Nots(s: String) extends Val
+ − 43
+ − 44
+ − 45
+ − 46
abstract class Bit
+ − 47
case object Z extends Bit
+ − 48
case object S extends Bit
+ − 49
+ − 50
+ − 51
type Bits = List[Bit]
+ − 52
+ − 53
abstract class ARexp
+ − 54
case object AZERO extends ARexp
+ − 55
case class AONE(bs: Bits) extends ARexp
+ − 56
case class ACHAR(bs: Bits, c: Char) extends ARexp
+ − 57
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp
+ − 58
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp
+ − 59
case class ASTAR(bs: Bits, r: ARexp) extends ARexp
+ − 60
case class ANOT(bs: Bits, r: ARexp) extends ARexp
+ − 61
case class AANYCHAR(bs: Bits) extends ARexp
+ − 62
+ − 63
import scala.util.Try
+ − 64
+ − 65
trait Generator[+T] {
+ − 66
self => // an alias for "this"
+ − 67
def generate(): T
+ − 68
+ − 69
def gen(n: Int) : List[T] =
+ − 70
if (n == 0) Nil else self.generate() :: gen(n - 1)
+ − 71
+ − 72
def map[S](f: T => S): Generator[S] = new Generator[S] {
+ − 73
def generate = f(self.generate())
+ − 74
}
+ − 75
def flatMap[S](f: T => Generator[S]): Generator[S] = new Generator[S] {
+ − 76
def generate = f(self.generate()).generate()
+ − 77
}
+ − 78
+ − 79
+ − 80
}
+ − 81
+ − 82
// tests a property according to a given random generator
+ − 83
def test[T](r: Generator[T], amount: Int = 100)(pred: T => Boolean) {
+ − 84
for (_ <- 0 until amount) {
+ − 85
val value = r.generate()
+ − 86
assert(pred(value), s"Test failed for: $value")
+ − 87
}
+ − 88
println(s"Test passed $amount times")
+ − 89
}
+ − 90
def test2[T, S](r: Generator[T], s: Generator[S], amount: Int = 100)(pred: (T, S) => Boolean) {
+ − 91
for (_ <- 0 until amount) {
+ − 92
val valueR = r.generate()
+ − 93
val valueS = s.generate()
+ − 94
assert(pred(valueR, valueS), s"Test failed for: $valueR, $valueS")
+ − 95
}
+ − 96
println(s"Test passed $amount times")
+ − 97
}
+ − 98
+ − 99
// random integers
+ − 100
val integers = new Generator[Int] {
+ − 101
val rand = new java.util.Random
+ − 102
def generate() = rand.nextInt()
+ − 103
}
+ − 104
+ − 105
// random booleans
+ − 106
val booleans = integers.map(_ > 0)
+ − 107
+ − 108
// random integers in the range lo and high
+ − 109
def range(lo: Int, hi: Int): Generator[Int] =
+ − 110
for (x <- integers) yield (lo + x.abs % (hi - lo)).abs
+ − 111
+ − 112
// random characters
+ − 113
def chars_range(lo: Char, hi: Char) = range(lo, hi).map(_.toChar)
+ − 114
val chars = chars_range('a', 'z')
+ − 115
+ − 116
+ − 117
def oneOf[T](xs: T*): Generator[T] =
+ − 118
for (idx <- range(0, xs.length)) yield xs(idx)
+ − 119
+ − 120
def single[T](x: T) = new Generator[T] {
+ − 121
def generate() = x
+ − 122
}
+ − 123
+ − 124
def pairs[T, U](t: Generator[T], u: Generator[U]): Generator[(T, U)] =
+ − 125
for (x <- t; y <- u) yield (x, y)
+ − 126
+ − 127
// lists
+ − 128
def emptyLists = single(Nil)
+ − 129
+ − 130
def nonEmptyLists : Generator[List[Int]] =
+ − 131
for (head <- integers; tail <- lists) yield head :: tail
+ − 132
+ − 133
def lists: Generator[List[Int]] = for {
+ − 134
kind <- booleans
+ − 135
list <- if (kind) emptyLists else nonEmptyLists
+ − 136
} yield list
+ − 137
+ − 138
def char_list(len: Int): Generator[List[Char]] = {
+ − 139
if(len <= 0) single(Nil)
+ − 140
else{
+ − 141
for {
+ − 142
c <- chars_range('a', 'c')
+ − 143
tail <- char_list(len - 1)
+ − 144
} yield c :: tail
+ − 145
}
+ − 146
}
+ − 147
+ − 148
def strings(len: Int): Generator[String] = for(cs <- char_list(len)) yield cs.toString
+ − 149
+ − 150
def sampleString(r: Rexp) : List[String] = r match {
+ − 151
case STAR(r) => stringsFromRexp(r).flatMap(s => List("", s, s ++ s))//only generate 0, 1, 2 reptitions
+ − 152
case SEQ(r1, r2) => stringsFromRexp(r1).flatMap(s1 => stringsFromRexp(r2).map(s2 => s1 ++ s2) )
+ − 153
case ALTS(r1, r2) => throw new Error(s" Rexp ${r} not expected: all alternatives are supposed to have been opened up")
+ − 154
case ONE => "" :: Nil
+ − 155
case ZERO => Nil
+ − 156
case CHAR(c) => c.toString :: Nil
+ − 157
+ − 158
}
+ − 159
+ − 160
def stringsFromRexp(r: Rexp) : List[String] =
+ − 161
breakIntoTerms(r).flatMap(r => sampleString(r))
+ − 162
+ − 163
+ − 164
// (simple) binary trees
+ − 165
trait Tree[T]
+ − 166
case class Inner[T](left: Tree[T], right: Tree[T]) extends Tree[T]
+ − 167
case class Leaf[T](x: T) extends Tree[T]
+ − 168
+ − 169
def leafs[T](t: Generator[T]): Generator[Leaf[T]] =
+ − 170
for (x <- t) yield Leaf(x)
+ − 171
+ − 172
def inners[T](t: Generator[T]): Generator[Inner[T]] =
+ − 173
for (l <- trees(t); r <- trees(t)) yield Inner(l, r)
+ − 174
+ − 175
def trees[T](t: Generator[T]): Generator[Tree[T]] =
+ − 176
for (kind <- range(0, 2);
+ − 177
tree <- if (kind == 0) leafs(t) else inners(t)) yield tree
+ − 178
+ − 179
// regular expressions
+ − 180
+ − 181
// generates random leaf-regexes; prefers CHAR-regexes
+ − 182
def leaf_rexp() : Generator[Rexp] =
+ − 183
for (kind <- range(0, 5);
+ − 184
c <- chars_range('a', 'd')) yield
+ − 185
kind match {
+ − 186
case 0 => ZERO
+ − 187
case 1 => ONE
+ − 188
case _ => CHAR(c)
+ − 189
}
+ − 190
+ − 191
// generates random inner regexes with maximum depth d
+ − 192
def inner_rexp(d: Int) : Generator[Rexp] =
+ − 193
for (kind <- range(0, 3);
+ − 194
l <- rexp(d);
+ − 195
r <- rexp(d))
+ − 196
yield kind match {
+ − 197
case 0 => ALTS(l, r)
+ − 198
case 1 => SEQ(l, r)
+ − 199
case 2 => STAR(r)
+ − 200
}
+ − 201
+ − 202
// generates random regexes with maximum depth d;
+ − 203
// prefers inner regexes in 2/3 of the cases
+ − 204
def rexp(d: Int = 100): Generator[Rexp] =
+ − 205
for (kind <- range(0, 3);
+ − 206
r <- if (d <= 0 || kind == 0) leaf_rexp() else inner_rexp(d - 1)) yield r
+ − 207
+ − 208
+ − 209
// some test functions for rexps
+ − 210
def height(r: Rexp) : Int = r match {
+ − 211
case ZERO => 1
+ − 212
case ONE => 1
+ − 213
case CHAR(_) => 1
+ − 214
case ALTS(r1, r2) => 1 + List(height(r1), height(r2)).max
+ − 215
case SEQ(r1, r2) => 1 + List(height(r1), height(r2)).max
+ − 216
case STAR(r) => 1 + height(r)
+ − 217
}
+ − 218
+ − 219
// def size(r: Rexp) : Int = r match {
+ − 220
// case ZERO => 1
+ − 221
// case ONE => 1
+ − 222
// case CHAR(_) => 1
+ − 223
// case ALTS(r1, r2) => 1 + size(r1) + size(r2)
+ − 224
// case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 225
// case STAR(r) => 1 + size(r)
+ − 226
// }
+ − 227
+ − 228
// randomly subtracts 1 or 2 from the STAR case
+ − 229
def size_faulty(r: Rexp) : Int = r match {
+ − 230
case ZERO => 1
+ − 231
case ONE => 1
+ − 232
case CHAR(_) => 1
+ − 233
case ALTS(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
+ − 234
case SEQ(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
+ − 235
case STAR(r) => 1 + size_faulty(r) - range(0, 2).generate
+ − 236
}
+ − 237
+ − 238
+ − 239
+ − 240
// some convenience for typing in regular expressions
+ − 241
+ − 242
def charlist2rexp(s : List[Char]): Rexp = s match {
+ − 243
case Nil => ONE
+ − 244
case c::Nil => CHAR(c)
+ − 245
case c::s => SEQ(CHAR(c), charlist2rexp(s))
+ − 246
}
+ − 247
implicit def string2rexp(s : String) : Rexp =
+ − 248
charlist2rexp(s.toList)
+ − 249
+ − 250
implicit def RexpOps(r: Rexp) = new {
+ − 251
def | (s: Rexp) = ALTS(r, s)
+ − 252
def % = STAR(r)
+ − 253
def ~ (s: Rexp) = SEQ(r, s)
+ − 254
}
+ − 255
+ − 256
implicit def stringOps(s: String) = new {
+ − 257
def | (r: Rexp) = ALTS(s, r)
+ − 258
def | (r: String) = ALTS(s, r)
+ − 259
def % = STAR(s)
+ − 260
def ~ (r: Rexp) = SEQ(s, r)
+ − 261
def ~ (r: String) = SEQ(s, r)
+ − 262
def $ (r: Rexp) = RECD(s, r)
+ − 263
}
+ − 264
+ − 265
def nullable(r: Rexp) : Boolean = r match {
+ − 266
case ZERO => false
+ − 267
case ONE => true
+ − 268
case CHAR(_) => false
+ − 269
case ANYCHAR => false
+ − 270
case ALTS(r1, r2) => nullable(r1) || nullable(r2)
+ − 271
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ − 272
case STAR(_) => true
+ − 273
case RECD(_, r1) => nullable(r1)
+ − 274
case NTIMES(n, r) => if (n == 0) true else nullable(r)
+ − 275
case OPTIONAL(r) => true
+ − 276
case NOT(r) => !nullable(r)
+ − 277
}
+ − 278
+ − 279
def der(c: Char, r: Rexp) : Rexp = r match {
+ − 280
case ZERO => ZERO
+ − 281
case ONE => ZERO
+ − 282
case CHAR(d) => if (c == d) ONE else ZERO
+ − 283
case ANYCHAR => ONE
+ − 284
case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
+ − 285
case SEQ(r1, r2) =>
+ − 286
if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
+ − 287
else SEQ(der(c, r1), r2)
+ − 288
case STAR(r) => SEQ(der(c, r), STAR(r))
+ − 289
case RECD(_, r1) => der(c, r1)
+ − 290
case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
+ − 291
case OPTIONAL(r) => der(c, r)
+ − 292
case NOT(r) => NOT(der(c, r))
+ − 293
}
+ − 294
+ − 295
+ − 296
// extracts a string from a value
+ − 297
def flatten(v: Val) : String = v match {
+ − 298
case Empty => ""
+ − 299
case Chr(c) => c.toString
+ − 300
case Left(v) => flatten(v)
+ − 301
case Right(v) => flatten(v)
+ − 302
case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
+ − 303
case Stars(vs) => vs.map(flatten).mkString
+ − 304
case Ntime(vs) => vs.map(flatten).mkString
+ − 305
case Optionall(v) => flatten(v)
+ − 306
case Rec(_, v) => flatten(v)
+ − 307
}
+ − 308
+ − 309
+ − 310
// extracts an environment from a value;
+ − 311
// used for tokenising a string
+ − 312
def env(v: Val) : List[(String, String)] = v match {
+ − 313
case Empty => Nil
+ − 314
case Chr(c) => Nil
+ − 315
case Left(v) => env(v)
+ − 316
case Right(v) => env(v)
+ − 317
case Sequ(v1, v2) => env(v1) ::: env(v2)
+ − 318
case Stars(vs) => vs.flatMap(env)
+ − 319
case Ntime(vs) => vs.flatMap(env)
+ − 320
case Rec(x, v) => (x, flatten(v))::env(v)
+ − 321
case Optionall(v) => env(v)
+ − 322
case Nots(s) => ("Negative", s) :: Nil
+ − 323
}
+ − 324
+ − 325
+ − 326
// The injection and mkeps part of the lexer
+ − 327
//===========================================
+ − 328
+ − 329
def mkeps(r: Rexp) : Val = r match {
+ − 330
case ONE => Empty
+ − 331
case ALTS(r1, r2) =>
+ − 332
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+ − 333
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+ − 334
case STAR(r) => Stars(Nil)
+ − 335
case RECD(x, r) => Rec(x, mkeps(r))
+ − 336
case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
+ − 337
case OPTIONAL(r) => Optionall(Empty)
+ − 338
case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")
+ − 339
else Nots("")//Nots(s.reverse.toString)
+ − 340
// case NOT(ZERO) => Empty
+ − 341
// case NOT(CHAR(c)) => Empty
+ − 342
// case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
+ − 343
// case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
+ − 344
// case NOT(STAR(r)) => Stars(Nil)
+ − 345
+ − 346
}
+ − 347
+ − 348
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+ − 349
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+ − 350
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+ − 351
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+ − 352
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+ − 353
case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
+ − 354
case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
+ − 355
case (CHAR(d), Empty) => Chr(c)
+ − 356
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+ − 357
case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
+ − 358
case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
+ − 359
case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
+ − 360
case (ANYCHAR, Empty) => Chr(c)
+ − 361
}
+ − 362
+ − 363
// some "rectification" functions for simplification
+ − 364
+ − 365
+ − 366
+ − 367
+ − 368
// The Lexing Rules for the WHILE Language
+ − 369
+ − 370
// bnullable function: tests whether the aregular
+ − 371
// expression can recognise the empty string
+ − 372
def bnullable (r: ARexp) : Boolean = r match {
+ − 373
case AZERO => false
+ − 374
case AONE(_) => true
+ − 375
case ACHAR(_,_) => false
+ − 376
case AALTS(_, rs) => rs.exists(bnullable)
+ − 377
case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
+ − 378
case ASTAR(_, _) => true
+ − 379
case ANOT(_, rn) => !bnullable(rn)
+ − 380
}
+ − 381
+ − 382
def mkepsBC(r: ARexp) : Bits = r match {
+ − 383
case AONE(bs) => bs
+ − 384
case AALTS(bs, rs) => {
+ − 385
val n = rs.indexWhere(bnullable)
+ − 386
bs ++ mkepsBC(rs(n))
+ − 387
}
+ − 388
case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
+ − 389
case ASTAR(bs, r) => bs ++ List(Z)
+ − 390
case ANOT(bs, rn) => bs
+ − 391
}
+ − 392
+ − 393
+ − 394
def bder(c: Char, r: ARexp) : ARexp = r match {
+ − 395
case AZERO => AZERO
+ − 396
case AONE(_) => AZERO
+ − 397
case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
+ − 398
case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
+ − 399
case ASEQ(bs, r1, r2) =>
+ − 400
if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
+ − 401
else ASEQ(bs, bder(c, r1), r2)
+ − 402
case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
+ − 403
case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
+ − 404
case AANYCHAR(bs) => AONE(bs)
+ − 405
}
+ − 406
+ − 407
def fuse(bs: Bits, r: ARexp) : ARexp = r match {
+ − 408
case AZERO => AZERO
+ − 409
case AONE(cs) => AONE(bs ++ cs)
+ − 410
case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
+ − 411
case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
+ − 412
case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
+ − 413
case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
+ − 414
case ANOT(cs, r) => ANOT(bs ++ cs, r)
+ − 415
}
+ − 416
+ − 417
+ − 418
def internalise(r: Rexp) : ARexp = r match {
+ − 419
case ZERO => AZERO
+ − 420
case ONE => AONE(Nil)
+ − 421
case CHAR(c) => ACHAR(Nil, c)
+ − 422
//case PRED(f) => APRED(Nil, f)
+ − 423
case ALTS(r1, r2) =>
+ − 424
AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
+ − 425
// case ALTS(r1::rs) => {
+ − 426
// val AALTS(Nil, rs2) = internalise(ALTS(rs))
+ − 427
// AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
+ − 428
// }
+ − 429
case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
+ − 430
case STAR(r) => ASTAR(Nil, internalise(r))
+ − 431
case RECD(x, r) => internalise(r)
+ − 432
case NOT(r) => ANOT(Nil, internalise(r))
+ − 433
case ANYCHAR => AANYCHAR(Nil)
+ − 434
}
+ − 435
+ − 436
+ − 437
def bsimp(r: ARexp): ARexp =
+ − 438
{
+ − 439
r match {
+ − 440
case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
+ − 441
case (AZERO, _) => AZERO
+ − 442
case (_, AZERO) => AZERO
+ − 443
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 444
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 445
}
+ − 446
case AALTS(bs1, rs) => {
+ − 447
val rs_simp = rs.map(bsimp(_))
+ − 448
val flat_res = flats(rs_simp)
+ − 449
val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
+ − 450
dist_res match {
+ − 451
case Nil => AZERO
+ − 452
case s :: Nil => fuse(bs1, s)
+ − 453
case rs => AALTS(bs1, rs)
+ − 454
}
+ − 455
+ − 456
}
+ − 457
case r => r
+ − 458
}
+ − 459
}
+ − 460
def strongBsimp(r: ARexp): ARexp =
+ − 461
{
+ − 462
r match {
+ − 463
case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
+ − 464
case (AZERO, _) => AZERO
+ − 465
case (_, AZERO) => AZERO
+ − 466
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 467
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 468
}
+ − 469
case AALTS(bs1, rs) => {
+ − 470
val rs_simp = rs.map(strongBsimp(_))
+ − 471
val flat_res = flats(rs_simp)
+ − 472
val dist_res = distinctBy4(flat_res)//distinctBy(flat_res, erase)
+ − 473
dist_res match {
+ − 474
case Nil => AZERO
+ − 475
case s :: Nil => fuse(bs1, s)
+ − 476
case rs => AALTS(bs1, rs)
+ − 477
}
+ − 478
+ − 479
}
+ − 480
case r => r
+ − 481
}
+ − 482
}
+ − 483
+ − 484
def strongBsimp5(r: ARexp): ARexp =
+ − 485
{
+ − 486
// println("was this called?")
+ − 487
r match {
+ − 488
case ASEQ(bs1, r1, r2) => (strongBsimp5(r1), strongBsimp5(r2)) match {
+ − 489
case (AZERO, _) => AZERO
+ − 490
case (_, AZERO) => AZERO
+ − 491
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 492
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 493
}
+ − 494
case AALTS(bs1, rs) => {
+ − 495
// println("alts case")
+ − 496
val rs_simp = rs.map(strongBsimp5(_))
+ − 497
val flat_res = flats(rs_simp)
+ − 498
var dist_res = distinctBy5(flat_res)//distinctBy(flat_res, erase)
+ − 499
var dist2_res = distinctBy5(dist_res)
+ − 500
while(dist_res != dist2_res){
+ − 501
dist_res = dist2_res
+ − 502
dist2_res = distinctBy5(dist_res)
+ − 503
}
+ − 504
(dist2_res) match {
+ − 505
case Nil => AZERO
+ − 506
case s :: Nil => fuse(bs1, s)
+ − 507
case rs => AALTS(bs1, rs)
+ − 508
}
+ − 509
}
+ − 510
case r => r
+ − 511
}
+ − 512
}
+ − 513
+ − 514
def bders (s: List[Char], r: ARexp) : ARexp = s match {
+ − 515
case Nil => r
+ − 516
case c::s => bders(s, bder(c, r))
+ − 517
}
+ − 518
+ − 519
def flats(rs: List[ARexp]): List[ARexp] = rs match {
+ − 520
case Nil => Nil
+ − 521
case AZERO :: rs1 => flats(rs1)
+ − 522
case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
+ − 523
case r1 :: rs2 => r1 :: flats(rs2)
+ − 524
}
+ − 525
+ − 526
def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
+ − 527
case Nil => Nil
+ − 528
case (x::xs) => {
+ − 529
val res = f(x)
+ − 530
if (acc.contains(res)) distinctBy(xs, f, acc)
+ − 531
else x::distinctBy(xs, f, res::acc)
+ − 532
}
+ − 533
}
+ − 534
+ − 535
+ − 536
def pruneRexp(r: ARexp, allowableTerms: List[Rexp]) : ARexp = {
+ − 537
r match {
+ − 538
case ASEQ(bs, r1, r2) =>
+ − 539
val termsTruncated = allowableTerms.collect(rt => rt match {
+ − 540
case SEQ(r1p, r2p) if(r2p == erase(r2)) => r1p//if(r2p == erase(r2))
+ − 541
})
+ − 542
val pruned : ARexp = pruneRexp(r1, termsTruncated)
+ − 543
pruned match {
+ − 544
case AZERO => AZERO
+ − 545
case AONE(bs1) => fuse(bs ++ bs1, r2)
+ − 546
case pruned1 => ASEQ(bs, pruned1, r2)
+ − 547
}
+ − 548
+ − 549
case AALTS(bs, rs) =>
+ − 550
//allowableTerms.foreach(a => println(shortRexpOutput(a)))
+ − 551
val rsp = rs.map(r =>
+ − 552
pruneRexp(r, allowableTerms)
+ − 553
)
+ − 554
.filter(r => r != AZERO)
+ − 555
rsp match {
+ − 556
case Nil => AZERO
+ − 557
case r1::Nil => fuse(bs, r1)
+ − 558
case rs1 => AALTS(bs, rs1)
+ − 559
}
+ − 560
case r =>
+ − 561
if(allowableTerms.contains(erase(r))) r else AZERO //assert(r != AZERO)
+ − 562
}
+ − 563
}
+ − 564
+ − 565
def oneSimp(r: Rexp) : Rexp = r match {
+ − 566
case SEQ(ONE, r) => r
+ − 567
case SEQ(r1, r2) => SEQ(oneSimp(r1), r2)
+ − 568
case r => r//assert r != 0
+ − 569
+ − 570
}
+ − 571
+ − 572
+ − 573
def distinctBy4(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 574
case Nil => Nil
+ − 575
case x :: xs =>
+ − 576
//assert(acc.distinct == acc)
+ − 577
val erased = erase(x)
+ − 578
if(acc.contains(erased))
+ − 579
distinctBy4(xs, acc)
+ − 580
else{
+ − 581
val addToAcc = breakIntoTerms(erased).filter(r => !acc.contains(oneSimp(r)))
+ − 582
//val xp = pruneRexp(x, addToAcc)
+ − 583
pruneRexp(x, addToAcc) match {
+ − 584
case AZERO => distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 585
case xPrime => xPrime :: distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 586
}
+ − 587
}
+ − 588
}
+ − 589
+ − 590
// fun pAKC_aux :: "arexp list ⇒ arexp ⇒ rexp ⇒ arexp"
+ − 591
// where
+ − 592
// "pAKC_aux rsa r ctx = (if (L (SEQ (erase r) ( ctx) )) ⊆ (L (erase (AALTs [] rsa))) then AZERO else
+ − 593
// case r of (ASEQ bs r1 r2) ⇒
+ − 594
// bsimp_ASEQ bs (pAKC_aux rsa r1 (SEQ (erase r2) ( ctx) )) r2 |
+ − 595
// (AALTs bs rs) ⇒
+ − 596
// bsimp_AALTs bs (flts (map (λr. pAKC_aux rsa r ( ctx) ) rs) ) |
+ − 597
// r ⇒ r
+ − 598
+ − 599
// def canonicalSeq(rs: List[Rexp], acc: Rexp) = rs match {
+ − 600
// case r::Nil => SEQ(r, acc)
+ − 601
// case Nil => acc
+ − 602
// case r1::r2::Nil => SEQ(SEQ(r1, r2), acc)
+ − 603
// }
+ − 604
+ − 605
+ − 606
//the "fake" Language interpretation: just concatenates!
+ − 607
def L(acc: Rexp, rs: List[Rexp]) : Rexp = rs match {
+ − 608
case Nil => acc
+ − 609
case r :: rs1 =>
+ − 610
// if(acc == ONE)
+ − 611
// L(r, rs1)
+ − 612
// else
+ − 613
L(SEQ(acc, r), rs1)
+ − 614
}
+ − 615
+ − 616
def rprint(r: Rexp) : Unit = println(shortRexpOutput(r))
+ − 617
def rsprint(rs: List[Rexp]) = rs.foreach(r => println(shortRexpOutput(r)))
+ − 618
def aprint(a: ARexp) = println(shortRexpOutput(erase(a)))
+ − 619
def asprint(as: List[ARexp]) = as.foreach(a => println(shortRexpOutput(erase(a))))
+ − 620
+ − 621
def pAKC(acc: List[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = {
+ − 622
// println("pakc")
+ − 623
// println(shortRexpOutput(erase(r)))
+ − 624
// println("acc")
+ − 625
// rsprint(acc)
+ − 626
// println("ctx---------")
+ − 627
// rsprint(ctx)
+ − 628
// println("ctx---------end")
+ − 629
// rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp))
+ − 630
+ − 631
if (breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains)) {//acc.flatMap(breakIntoTerms
+ − 632
AZERO
+ − 633
}
+ − 634
else{
+ − 635
r match {
+ − 636
case ASEQ(bs, r1, r2) =>
+ − 637
(pAKC(acc, r1, erase(r2) :: ctx)) match{
+ − 638
case AZERO =>
+ − 639
AZERO
+ − 640
case AONE(bs1) =>
+ − 641
fuse(bs1, r2)
+ − 642
case r1p =>
+ − 643
ASEQ(bs, r1p, r2)
+ − 644
}
+ − 645
case AALTS(bs, rs0) =>
+ − 646
// println("before pruning")
+ − 647
// println(s"ctx is ")
+ − 648
// ctx.foreach(r => println(shortRexpOutput(r)))
+ − 649
// println(s"rs0 is ")
+ − 650
// rs0.foreach(r => println(shortRexpOutput(erase(r))))
+ − 651
// println(s"acc is ")
+ − 652
// acc.foreach(r => println(shortRexpOutput(r)))
+ − 653
rs0.map(r => pAKC(acc, r, ctx)).filter(_ != AZERO) match {
+ − 654
case Nil =>
+ − 655
// println("after pruning Nil")
+ − 656
AZERO
+ − 657
case r :: Nil =>
+ − 658
// println("after pruning singleton")
+ − 659
// println(shortRexpOutput(erase(r)))
+ − 660
r
+ − 661
case rs0p =>
+ − 662
// println("after pruning non-singleton")
+ − 663
AALTS(bs, rs0p)
+ − 664
}
+ − 665
case r => r
+ − 666
}
+ − 667
}
+ − 668
}
+ − 669
+ − 670
def distinctBy5(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
+ − 671
case Nil =>
+ − 672
Nil
+ − 673
case x :: xs => {
+ − 674
val erased = erase(x)
+ − 675
if(acc.contains(erased)){
+ − 676
distinctBy5(xs, acc)
+ − 677
}
+ − 678
else{
+ − 679
val pruned = pAKC(acc, x, Nil)
+ − 680
val newTerms = breakIntoTerms(erase(pruned))
+ − 681
pruned match {
+ − 682
case AZERO =>
+ − 683
distinctBy5(xs, acc)
+ − 684
case xPrime =>
+ − 685
xPrime :: distinctBy5(xs, newTerms.map(oneSimp) ::: acc)//distinctBy5(xs, addToAcc.map(oneSimp(_)) ::: acc)
+ − 686
}
+ − 687
}
+ − 688
}
+ − 689
}
+ − 690
+ − 691
+ − 692
def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
+ − 693
case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
+ − 694
case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
+ − 695
case ZERO => Nil
+ − 696
case _ => r::Nil
+ − 697
}
+ − 698
+ − 699
+ − 700
+ − 701
def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
+ − 702
case (ONE, bs) => (Empty, bs)
+ − 703
case (CHAR(f), bs) => (Chr(f), bs)
+ − 704
case (ALTS(r1, r2), Z::bs1) => {
+ − 705
val (v, bs2) = decode_aux(r1, bs1)
+ − 706
(Left(v), bs2)
+ − 707
}
+ − 708
case (ALTS(r1, r2), S::bs1) => {
+ − 709
val (v, bs2) = decode_aux(r2, bs1)
+ − 710
(Right(v), bs2)
+ − 711
}
+ − 712
case (SEQ(r1, r2), bs) => {
+ − 713
val (v1, bs1) = decode_aux(r1, bs)
+ − 714
val (v2, bs2) = decode_aux(r2, bs1)
+ − 715
(Sequ(v1, v2), bs2)
+ − 716
}
+ − 717
case (STAR(r1), S::bs) => {
+ − 718
val (v, bs1) = decode_aux(r1, bs)
+ − 719
//(v)
+ − 720
val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
+ − 721
(Stars(v::vs), bs2)
+ − 722
}
+ − 723
case (STAR(_), Z::bs) => (Stars(Nil), bs)
+ − 724
case (RECD(x, r1), bs) => {
+ − 725
val (v, bs1) = decode_aux(r1, bs)
+ − 726
(Rec(x, v), bs1)
+ − 727
}
+ − 728
case (NOT(r), bs) => (Nots(r.toString), bs)
+ − 729
}
+ − 730
+ − 731
def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
+ − 732
case (v, Nil) => v
+ − 733
case _ => throw new Exception("Not decodable")
+ − 734
}
+ − 735
+ − 736
+ − 737
+ − 738
def blexing_simp(r: Rexp, s: String) : Val = {
+ − 739
val bit_code = blex_simp(internalise(r), s.toList)
+ − 740
decode(r, bit_code)
+ − 741
}
+ − 742
def simpBlexer(r: Rexp, s: String) : Val = {
+ − 743
Try(blexing_simp(r, s)).getOrElse(Failure)
+ − 744
}
+ − 745
+ − 746
def strong_blexing_simp(r: Rexp, s: String) : Val = {
+ − 747
decode(r, strong_blex_simp(internalise(r), s.toList))
+ − 748
}
+ − 749
+ − 750
def strong_blexing_simp5(r: Rexp, s: String) : Val = {
+ − 751
decode(r, strong_blex_simp5(internalise(r), s.toList))
+ − 752
}
+ − 753
+ − 754
+ − 755
def strongBlexer(r: Rexp, s: String) : Val = {
+ − 756
Try(strong_blexing_simp(r, s)).getOrElse(Failure)
+ − 757
}
+ − 758
+ − 759
def strongBlexer5(r: Rexp, s: String): Val = {
+ − 760
Try(strong_blexing_simp5(r, s)).getOrElse(Failure)
+ − 761
}
+ − 762
+ − 763
def strong_blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 764
case Nil => {
+ − 765
if (bnullable(r)) {
+ − 766
//println(asize(r))
+ − 767
val bits = mkepsBC(r)
+ − 768
bits
+ − 769
}
+ − 770
else
+ − 771
throw new Exception("Not matched")
+ − 772
}
+ − 773
case c::cs => {
+ − 774
val der_res = bder(c,r)
+ − 775
val simp_res = strongBsimp(der_res)
+ − 776
strong_blex_simp(simp_res, cs)
+ − 777
}
+ − 778
}
+ − 779
+ − 780
def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match {
+ − 781
case Nil => {
+ − 782
if (bnullable(r)) {
+ − 783
//println(asize(r))
+ − 784
val bits = mkepsBC(r)
+ − 785
bits
+ − 786
}
+ − 787
else
+ − 788
throw new Exception("Not matched")
+ − 789
}
+ − 790
case c::cs => {
+ − 791
val der_res = bder(c,r)
+ − 792
val simp_res = strongBsimp5(der_res)
+ − 793
strong_blex_simp5(simp_res, cs)
+ − 794
}
+ − 795
}
+ − 796
+ − 797
+ − 798
def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
+ − 799
case Nil => r
+ − 800
case c::s =>
+ − 801
//println(erase(r))
+ − 802
bders_simp(s, bsimp(bder(c, r)))
+ − 803
}
+ − 804
+ − 805
def bdersStrong5(s: List[Char], r: ARexp) : ARexp = s match {
+ − 806
case Nil => r
+ − 807
case c::s => bdersStrong5(s, strongBsimp5(bder(c, r)))
+ − 808
}
+ − 809
+ − 810
def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
+ − 811
+ − 812
def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
+ − 813
case Nil => r
+ − 814
case c::s => bdersStrong(s, strongBsimp(bder(c, r)))
+ − 815
}
+ − 816
+ − 817
def bdersStrongRexp(s: String, r: Rexp) : ARexp = bdersStrong(s.toList, internalise(r))
+ − 818
+ − 819
def erase(r:ARexp): Rexp = r match{
+ − 820
case AZERO => ZERO
+ − 821
case AONE(_) => ONE
+ − 822
case ACHAR(bs, c) => CHAR(c)
+ − 823
case AALTS(bs, Nil) => ZERO
+ − 824
case AALTS(bs, a::Nil) => erase(a)
+ − 825
case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
+ − 826
case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
+ − 827
case ASTAR(cs, r)=> STAR(erase(r))
+ − 828
case ANOT(bs, r) => NOT(erase(r))
+ − 829
case AANYCHAR(bs) => ANYCHAR
+ − 830
}
+ − 831
+ − 832
+ − 833
def allCharSeq(r: Rexp) : Boolean = r match {
+ − 834
case CHAR(c) => true
+ − 835
case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
+ − 836
case _ => false
+ − 837
}
+ − 838
+ − 839
def flattenSeq(r: Rexp) : String = r match {
+ − 840
case CHAR(c) => c.toString
+ − 841
case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
+ − 842
case _ => throw new Error("flatten unflattenable rexp")
+ − 843
}
+ − 844
+ − 845
def shortRexpOutput(r: Rexp) : String = r match {
+ − 846
case CHAR(c) => c.toString
+ − 847
case ONE => "1"
+ − 848
case ZERO => "0"
+ − 849
case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 850
case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 851
case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
+ − 852
case STAR(STAR(r)) => "(..)*"// ++ shortRexpOutput(r) ++ "]*"
+ − 853
case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
+ − 854
//case RTOP => "RTOP"
+ − 855
}
+ − 856
+ − 857
+ − 858
def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 859
case Nil => {
+ − 860
if (bnullable(r)) {
+ − 861
val bits = mkepsBC(r)
+ − 862
bits
+ − 863
}
+ − 864
else
+ − 865
throw new Exception("Not matched")
+ − 866
}
+ − 867
case c::cs => {
+ − 868
val der_res = bder(c,r)
+ − 869
val simp_res = bsimp(der_res)
+ − 870
blex_simp(simp_res, cs)
+ − 871
}
+ − 872
}
+ − 873
+ − 874
+ − 875
+ − 876
+ − 877
def size(r: Rexp) : Int = r match {
+ − 878
case ZERO => 1
+ − 879
case ONE => 1
+ − 880
case CHAR(_) => 1
+ − 881
case ANYCHAR => 1
+ − 882
case NOT(r0) => 1 + size(r0)
+ − 883
case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 884
case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
+ − 885
case STAR(r) => 1 + size(r)
+ − 886
}
+ − 887
+ − 888
def asize(a: ARexp) = size(erase(a))
+ − 889
+ − 890
//pder related
+ − 891
type Mon = (Char, Rexp)
+ − 892
type Lin = Set[Mon]
+ − 893
+ − 894
def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
+ − 895
case ZERO => Set()
+ − 896
case ONE => rs
+ − 897
case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) )
+ − 898
}
+ − 899
def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
+ − 900
case ZERO => Set()
+ − 901
case ONE => l
+ − 902
case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) } )
+ − 903
}
+ − 904
def lf(r: Rexp): Lin = r match {
+ − 905
case ZERO => Set()
+ − 906
case ONE => Set()
+ − 907
case CHAR(f) => {
+ − 908
//val Some(c) = alphabet.find(f)
+ − 909
Set((f, ONE))
+ − 910
}
+ − 911
case ALTS(r1, r2) => {
+ − 912
lf(r1 ) ++ lf(r2)
+ − 913
}
+ − 914
case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
+ − 915
case SEQ(r1, r2) =>{
+ − 916
if (nullable(r1))
+ − 917
cir_prod(lf(r1), r2) ++ lf(r2)
+ − 918
else
+ − 919
cir_prod(lf(r1), r2)
+ − 920
}
+ − 921
}
+ − 922
def lfs(r: Set[Rexp]): Lin = {
+ − 923
r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
+ − 924
}
+ − 925
+ − 926
def pder(x: Char, t: Rexp): Set[Rexp] = {
+ − 927
val lft = lf(t)
+ − 928
(lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
+ − 929
}
+ − 930
def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
+ − 931
case x::xs => pders(xs, pder(x, t))
+ − 932
case Nil => Set(t)
+ − 933
}
+ − 934
def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
+ − 935
case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 936
case Nil => ts
+ − 937
}
+ − 938
def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
+ − 939
def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
+ − 940
//all implementation of partial derivatives that involve set union are potentially buggy
+ − 941
//because they don't include the original regular term before they are pdered.
+ − 942
//now only pderas is fixed.
+ − 943
def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
+ − 944
def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
+ − 945
def awidth(r: Rexp) : Int = r match {
+ − 946
case CHAR(c) => 1
+ − 947
case SEQ(r1, r2) => awidth(r1) + awidth(r2)
+ − 948
case ALTS(r1, r2) => awidth(r1) + awidth(r2)
+ − 949
case ONE => 0
+ − 950
case ZERO => 0
+ − 951
case STAR(r) => awidth(r)
+ − 952
}
+ − 953
//def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
+ − 954
//def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
+ − 955
def o(r: Rexp) = if (nullable(r)) ONE else ZERO
+ − 956
//def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
+ − 957
def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
+ − 958
case ZERO => Set[Rexp]()
+ − 959
case ONE => Set[Rexp]()
+ − 960
case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
+ − 961
case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
+ − 962
case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
+ − 963
case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
+ − 964
}
+ − 965
def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
+ − 966
case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 967
case Nil => ts
+ − 968
}
+ − 969
def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
+ − 970
+ − 971
+ − 972
+ − 973
def starPrint(r: Rexp) : Unit = r match {
+ − 974
+ − 975
case SEQ(head, rstar) =>
+ − 976
println(shortRexpOutput(head) ++ "~STARREG")
+ − 977
case STAR(rstar) =>
+ − 978
println("STARREG")
+ − 979
case ALTS(r1, r2) =>
+ − 980
println("(")
+ − 981
starPrint(r1)
+ − 982
println("+")
+ − 983
starPrint(r2)
+ − 984
println(")")
+ − 985
case ZERO => println("0")
+ − 986
}
+ − 987
+ − 988
// @arg(doc = "small tests")
+ − 989
def n_astar_list(d: Int) : Rexp = {
+ − 990
if(d == 0) STAR("a")
+ − 991
else ALTS(STAR("a" * d), n_astar_list(d - 1))
+ − 992
}
+ − 993
def n_astar_alts(d: Int) : Rexp = d match {
+ − 994
case 0 => n_astar_list(0)
+ − 995
case d => STAR(n_astar_list(d))
+ − 996
// case r1 :: r2 :: Nil => ALTS(r1, r2)
+ − 997
// case r1 :: Nil => r1
+ − 998
// case r :: rs => ALTS(r, n_astar_alts(rs))
+ − 999
// case Nil => throw new Error("should give at least 1 elem")
+ − 1000
}
+ − 1001
def n_astar_aux(d: Int) = {
+ − 1002
if(d == 0) n_astar_alts(0)
+ − 1003
else ALTS(n_astar_alts(d), n_astar_alts(d - 1))
+ − 1004
}
+ − 1005
+ − 1006
def n_astar(d: Int) : Rexp = STAR(n_astar_aux(d))
+ − 1007
//val STARREG = n_astar(3)
+ − 1008
// ( STAR("a") |
+ − 1009
// ("a" | "aa").% |
+ − 1010
// ( "a" | "aa" | "aaa").%
+ − 1011
// ).%
+ − 1012
//( "a" | "aa" | "aaa" | "aaaa").% |
+ − 1013
//( "a" | "aa" | "aaa" | "aaaa" | "aaaaa").%
+ − 1014
(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa"| ( STAR("aaaaaaa")) | STAR("aaaaaaaaaaa"))).%).%).%
+ − 1015
+ − 1016
// @main
+ − 1017
+ − 1018
def lcm(list: Seq[Int]):Int=list.foldLeft(1:Int){
+ − 1019
(a, b) => b * a /
+ − 1020
Stream.iterate((a,b)){case (x,y) => (y, x%y)}.dropWhile(_._2 != 0).head._1.abs
+ − 1021
}
+ − 1022
+ − 1023
def small() = {
+ − 1024
//val pderSTAR = pderUNIV(STARREG)
+ − 1025
+ − 1026
//val refSize = pderSTAR.map(size(_)).sum
518
+ − 1027
for(n <- 1 to 12){
+ − 1028
val STARREG = n_astar_alts(n)
517
+ − 1029
val iMax = (lcm((1 to n).toList))
518
+ − 1030
for(i <- iMax to iMax ){// 100, 400, 800, 840, 841, 900
517
+ − 1031
val prog0 = "a" * i
+ − 1032
//println(s"test: $prog0")
+ − 1033
print(n)
+ − 1034
print(" ")
+ − 1035
// print(i)
+ − 1036
// print(" ")
+ − 1037
println(asize(bders_simp(prog0.toList, internalise(STARREG))))
+ − 1038
}
+ − 1039
}
+ − 1040
}
+ − 1041
+ − 1042
def generator_test() {
+ − 1043
+ − 1044
// test(rexp(7), 1000) { (r: Rexp) =>
+ − 1045
// val ss = stringsFromRexp(r)
+ − 1046
// val boolList = ss.map(s => {
+ − 1047
// val simpVal = simpBlexer(r, s)
+ − 1048
// val strongVal = strongBlexer(r, s)
+ − 1049
// // println(simpVal)
+ − 1050
// // println(strongVal)
+ − 1051
// (simpVal == strongVal) && (simpVal != None)
+ − 1052
// })
+ − 1053
// !boolList.exists(b => b == false)
+ − 1054
// }
+ − 1055
// test(single(STAR(ALTS(STAR(CHAR('c')),ALTS(CHAR('c'),ZERO)))), 100000) { (r: Rexp) =>
+ − 1056
// val ss = stringsFromRexp(r)
+ − 1057
// val boolList = ss.map(s => {
+ − 1058
// val bdStrong = bdersStrong(s.toList, internalise(r))
+ − 1059
// val bdStrong5 = bdersStrong5(s.toList, internalise(r))
+ − 1060
// // println(shortRexpOutput(r))
+ − 1061
// // println(s)
+ − 1062
// // println(strongVal)
+ − 1063
// // println(strongVal5)
+ − 1064
// // (strongVal == strongVal5)
+ − 1065
+ − 1066
// if(asize(bdStrong5) > asize(bdStrong)){
+ − 1067
// println(s)
+ − 1068
// println(shortRexpOutput(erase(bdStrong5)))
+ − 1069
// println(shortRexpOutput(erase(bdStrong)))
+ − 1070
// }
+ − 1071
// asize(bdStrong5) <= asize(bdStrong)
+ − 1072
// })
+ − 1073
// !boolList.exists(b => b == false)
+ − 1074
// }
+ − 1075
//*** example where bdStrong5 has a smaller size than bdStrong
+ − 1076
// test(single(STAR(SEQ(ALTS(SEQ(STAR(CHAR('a')),ALTS(ALTS(ONE,ZERO),SEQ(ONE,ONE))),CHAR('a')),ONE))), 1) { (r: Rexp) =>
+ − 1077
//*** depth 5 example bdStrong5 larger size than bdStrong
+ − 1078
// test(single(STAR(SEQ(SEQ(ALTS(CHAR('b'),STAR(CHAR('b'))),CHAR('b')),(ALTS(STAR(CHAR('c')), ONE))))), 1) {(r: Rexp) =>
+ − 1079
+ − 1080
+ − 1081
+ − 1082
//sanity check from Christian's request
+ − 1083
// val r = ("a" | "ab") ~ ("bc" | "c")
+ − 1084
// val a = internalise(r)
+ − 1085
// val aval = blexing_simp(r, "abc")
+ − 1086
// println(aval)
+ − 1087
+ − 1088
//sample counterexample:(depth 7)
+ − 1089
//STAR(SEQ(ALTS(STAR(STAR(STAR(STAR(CHAR(c))))),ALTS(CHAR(c),CHAR(b))),ALTS(ZERO,SEQ(ALTS(ALTS(STAR(CHAR(c)),SEQ(CHAR(b),CHAR(a))),CHAR(c)),STAR(ALTS(ALTS(ONE,CHAR(a)),STAR(CHAR(c))))))))
+ − 1090
//(depth5)
+ − 1091
//STAR(SEQ(ALTS(ALTS(STAR(CHAR(b)),SEQ(ONE,CHAR(b))),SEQ(STAR(CHAR(a)),CHAR(b))),ALTS(ZERO,ALTS(STAR(CHAR(b)),STAR(CHAR(a))))))
+ − 1092
test(rexp(4), 10000000) { (r: Rexp) =>
+ − 1093
// ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) =>
+ − 1094
val ss = stringsFromRexp(r)
+ − 1095
val boolList = ss.map(s => {
+ − 1096
val bdStrong = bdersStrong(s.toList, internalise(r))
+ − 1097
val bdStrong5 = bdersStrong5(s.toList, internalise(r))
+ − 1098
val bdFurther5 = strongBsimp5(bdStrong5)
+ − 1099
val sizeFurther5 = asize(bdFurther5)
+ − 1100
val pdersSet = pderUNIV(r)
+ − 1101
val size5 = asize(bdStrong5)
+ − 1102
val size0 = asize(bdStrong)
+ − 1103
// println(s)
+ − 1104
// println("pdersSet size")
+ − 1105
// println(pdersSet.size)
+ − 1106
// pdersSet.foreach(r => println(shortRexpOutput(r)))
+ − 1107
// println("after bdStrong5")
+ − 1108
+ − 1109
// println(shortRexpOutput(erase(bdStrong5)))
+ − 1110
// println(breakIntoTerms(erase(bdStrong5)).size)
+ − 1111
+ − 1112
// println("after bdStrong")
+ − 1113
// println(shortRexpOutput(erase(bdStrong)))
+ − 1114
// println(breakIntoTerms(erase(bdStrong)).size)
+ − 1115
// println(size5, size0, sizeFurther5)
+ − 1116
// aprint(strongBsimp5(bdStrong))
+ − 1117
// println(asize(strongBsimp5(bdStrong5)))
+ − 1118
// println(s)
+ − 1119
size5 <= size0
+ − 1120
})
+ − 1121
// println(boolList)
+ − 1122
//!boolList.exists(b => b == false)
+ − 1123
!boolList.exists(b => b == false)
+ − 1124
}
+ − 1125
+ − 1126
+ − 1127
}
+ − 1128
small()
+ − 1129
// generator_test()
+ − 1130
+ − 1131
// 1
+ − 1132
+ − 1133
// SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))),
+ − 1134
// SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))),
+ − 1135
// STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))),
+ − 1136
// SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE))
+ − 1137
+ − 1138
+ − 1139
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))
+ − 1140
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))