403
+ − 1
// A simple lexer inspired by work of Sulzmann & Lu
+ − 2
//==================================================
+ − 3
//
+ − 4
// Call the test cases with
+ − 5
//
+ − 6
// amm lexer.sc small
+ − 7
// amm lexer.sc fib
+ − 8
// amm lexer.sc loops
+ − 9
// amm lexer.sc email
+ − 10
//
+ − 11
// amm lexer.sc all
+ − 12
+ − 13
+ − 14
// regular expressions including records
+ − 15
abstract class Rexp
+ − 16
case object ZERO extends Rexp
+ − 17
case object ONE extends Rexp
+ − 18
case object ANYCHAR extends Rexp
+ − 19
case class CHAR(c: Char) extends Rexp
+ − 20
case class ALTS(r1: Rexp, r2: Rexp) extends Rexp
+ − 21
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
+ − 22
case class STAR(r: Rexp) extends Rexp
+ − 23
case class RECD(x: String, r: Rexp) extends Rexp
+ − 24
case class NTIMES(n: Int, r: Rexp) extends Rexp
+ − 25
case class OPTIONAL(r: Rexp) extends Rexp
+ − 26
case class NOT(r: Rexp) extends Rexp
+ − 27
// records for extracting strings or tokens
+ − 28
+ − 29
// values
+ − 30
abstract class Val
+ − 31
case object Empty extends Val
+ − 32
case class Chr(c: Char) extends Val
+ − 33
case class Sequ(v1: Val, v2: Val) extends Val
+ − 34
case class Left(v: Val) extends Val
+ − 35
case class Right(v: Val) extends Val
+ − 36
case class Stars(vs: List[Val]) extends Val
+ − 37
case class Rec(x: String, v: Val) extends Val
+ − 38
case class Ntime(vs: List[Val]) extends Val
+ − 39
case class Optionall(v: Val) extends Val
+ − 40
case class Nots(s: String) extends Val
+ − 41
+ − 42
+ − 43
+ − 44
abstract class Bit
+ − 45
case object Z extends Bit
+ − 46
case object S extends Bit
+ − 47
+ − 48
+ − 49
type Bits = List[Bit]
+ − 50
+ − 51
abstract class ARexp
+ − 52
case object AZERO extends ARexp
+ − 53
case class AONE(bs: Bits) extends ARexp
+ − 54
case class ACHAR(bs: Bits, c: Char) extends ARexp
+ − 55
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp
+ − 56
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp
+ − 57
case class ASTAR(bs: Bits, r: ARexp) extends ARexp
+ − 58
case class ANOT(bs: Bits, r: ARexp) extends ARexp
+ − 59
case class AANYCHAR(bs: Bits) extends ARexp
+ − 60
+ − 61
+ − 62
+ − 63
// some convenience for typing in regular expressions
+ − 64
+ − 65
def charlist2rexp(s : List[Char]): Rexp = s match {
+ − 66
case Nil => ONE
+ − 67
case c::Nil => CHAR(c)
+ − 68
case c::s => SEQ(CHAR(c), charlist2rexp(s))
+ − 69
}
+ − 70
implicit def string2rexp(s : String) : Rexp =
+ − 71
charlist2rexp(s.toList)
+ − 72
+ − 73
implicit def RexpOps(r: Rexp) = new {
+ − 74
def | (s: Rexp) = ALTS(r, s)
+ − 75
def % = STAR(r)
+ − 76
def ~ (s: Rexp) = SEQ(r, s)
+ − 77
}
+ − 78
+ − 79
implicit def stringOps(s: String) = new {
+ − 80
def | (r: Rexp) = ALTS(s, r)
+ − 81
def | (r: String) = ALTS(s, r)
+ − 82
def % = STAR(s)
+ − 83
def ~ (r: Rexp) = SEQ(s, r)
+ − 84
def ~ (r: String) = SEQ(s, r)
+ − 85
def $ (r: Rexp) = RECD(s, r)
+ − 86
}
+ − 87
+ − 88
def nullable(r: Rexp) : Boolean = r match {
+ − 89
case ZERO => false
+ − 90
case ONE => true
+ − 91
case CHAR(_) => false
+ − 92
case ANYCHAR => false
+ − 93
case ALTS(r1, r2) => nullable(r1) || nullable(r2)
+ − 94
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+ − 95
case STAR(_) => true
+ − 96
case RECD(_, r1) => nullable(r1)
+ − 97
case NTIMES(n, r) => if (n == 0) true else nullable(r)
+ − 98
case OPTIONAL(r) => true
+ − 99
case NOT(r) => !nullable(r)
+ − 100
}
+ − 101
+ − 102
def der(c: Char, r: Rexp) : Rexp = r match {
+ − 103
case ZERO => ZERO
+ − 104
case ONE => ZERO
+ − 105
case CHAR(d) => if (c == d) ONE else ZERO
+ − 106
case ANYCHAR => ONE
+ − 107
case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
+ − 108
case SEQ(r1, r2) =>
+ − 109
if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
+ − 110
else SEQ(der(c, r1), r2)
+ − 111
case STAR(r) => SEQ(der(c, r), STAR(r))
+ − 112
case RECD(_, r1) => der(c, r1)
+ − 113
case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
+ − 114
case OPTIONAL(r) => der(c, r)
+ − 115
case NOT(r) => NOT(der(c, r))
+ − 116
}
+ − 117
+ − 118
+ − 119
// extracts a string from a value
+ − 120
def flatten(v: Val) : String = v match {
+ − 121
case Empty => ""
+ − 122
case Chr(c) => c.toString
+ − 123
case Left(v) => flatten(v)
+ − 124
case Right(v) => flatten(v)
+ − 125
case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
+ − 126
case Stars(vs) => vs.map(flatten).mkString
+ − 127
case Ntime(vs) => vs.map(flatten).mkString
+ − 128
case Optionall(v) => flatten(v)
+ − 129
case Rec(_, v) => flatten(v)
+ − 130
}
+ − 131
+ − 132
+ − 133
// extracts an environment from a value;
+ − 134
// used for tokenising a string
+ − 135
def env(v: Val) : List[(String, String)] = v match {
+ − 136
case Empty => Nil
+ − 137
case Chr(c) => Nil
+ − 138
case Left(v) => env(v)
+ − 139
case Right(v) => env(v)
+ − 140
case Sequ(v1, v2) => env(v1) ::: env(v2)
+ − 141
case Stars(vs) => vs.flatMap(env)
+ − 142
case Ntime(vs) => vs.flatMap(env)
+ − 143
case Rec(x, v) => (x, flatten(v))::env(v)
+ − 144
case Optionall(v) => env(v)
+ − 145
case Nots(s) => ("Negative", s) :: Nil
+ − 146
}
+ − 147
+ − 148
+ − 149
// The injection and mkeps part of the lexer
+ − 150
//===========================================
+ − 151
+ − 152
def mkeps(r: Rexp) : Val = r match {
+ − 153
case ONE => Empty
+ − 154
case ALTS(r1, r2) =>
+ − 155
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
+ − 156
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
+ − 157
case STAR(r) => Stars(Nil)
+ − 158
case RECD(x, r) => Rec(x, mkeps(r))
+ − 159
case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
+ − 160
case OPTIONAL(r) => Optionall(Empty)
+ − 161
case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")
+ − 162
else Nots("")//Nots(s.reverse.toString)
+ − 163
// case NOT(ZERO) => Empty
+ − 164
// case NOT(CHAR(c)) => Empty
+ − 165
// case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
+ − 166
// case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
+ − 167
// case NOT(STAR(r)) => Stars(Nil)
+ − 168
+ − 169
}
+ − 170
+ − 171
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
+ − 172
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
+ − 173
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
+ − 174
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
+ − 175
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
+ − 176
case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
+ − 177
case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
+ − 178
case (CHAR(d), Empty) => Chr(c)
+ − 179
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
+ − 180
case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
+ − 181
case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
+ − 182
case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
+ − 183
case (ANYCHAR, Empty) => Chr(c)
+ − 184
}
+ − 185
+ − 186
// some "rectification" functions for simplification
+ − 187
def F_ID(v: Val): Val = v
+ − 188
def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
+ − 189
def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
+ − 190
def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
+ − 191
case Right(v) => Right(f2(v))
+ − 192
case Left(v) => Left(f1(v))
+ − 193
}
+ − 194
def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
+ − 195
case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
+ − 196
}
+ − 197
def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) =
+ − 198
(v:Val) => Sequ(f1(Empty), f2(v))
+ − 199
def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) =
+ − 200
(v:Val) => Sequ(f1(v), f2(Empty))
+ − 201
+ − 202
def F_ERROR(v: Val): Val = throw new Exception("error")
+ − 203
+ − 204
// simplification
+ − 205
def simp(r: Rexp): (Rexp, Val => Val) = r match {
+ − 206
case ALTS(r1, r2) => {
+ − 207
val (r1s, f1s) = simp(r1)
+ − 208
val (r2s, f2s) = simp(r2)
+ − 209
(r1s, r2s) match {
+ − 210
case (ZERO, _) => (r2s, F_RIGHT(f2s))
+ − 211
case (_, ZERO) => (r1s, F_LEFT(f1s))
+ − 212
case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
+ − 213
else (ALTS (r1s, r2s), F_ALT(f1s, f2s))
+ − 214
}
+ − 215
}
+ − 216
case SEQ(r1, r2) => {
+ − 217
val (r1s, f1s) = simp(r1)
+ − 218
val (r2s, f2s) = simp(r2)
+ − 219
(r1s, r2s) match {
+ − 220
case (ZERO, _) => (ZERO, F_ERROR)
+ − 221
case (_, ZERO) => (ZERO, F_ERROR)
+ − 222
case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
+ − 223
case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
+ − 224
case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
+ − 225
}
+ − 226
}
+ − 227
case r => (r, F_ID)
+ − 228
}
+ − 229
+ − 230
// lexing functions including simplification
+ − 231
def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
+ − 232
case Nil => if (nullable(r)) mkeps(r) else
+ − 233
{ throw new Exception(s"lexing error $r not nullable") }
+ − 234
case c::cs => {
+ − 235
val (r_simp, f_simp) = simp(der(c, r))
+ − 236
inj(r, c, f_simp(lex_simp(r_simp, cs)))
+ − 237
}
+ − 238
}
+ − 239
+ − 240
def lexing_simp(r: Rexp, s: String) =
+ − 241
env(lex_simp(r, s.toList))
+ − 242
+ − 243
// The Lexing Rules for the WHILE Language
+ − 244
+ − 245
def PLUS(r: Rexp) = r ~ r.%
+ − 246
+ − 247
def Range(s : List[Char]) : Rexp = s match {
+ − 248
case Nil => ZERO
+ − 249
case c::Nil => CHAR(c)
+ − 250
case c::s => ALTS(CHAR(c), Range(s))
+ − 251
}
+ − 252
def RANGE(s: String) = Range(s.toList)
+ − 253
+ − 254
val SYM = RANGE("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_")
+ − 255
val DIGIT = RANGE("0123456789")
+ − 256
val ID = SYM ~ (SYM | DIGIT).%
+ − 257
val NUM = PLUS(DIGIT)
+ − 258
val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write"
+ − 259
val SEMI: Rexp = ";"
+ − 260
val OP: Rexp = ":=" | "=" | "-" | "+" | "*" | "!=" | "<" | ">"
+ − 261
val WHITESPACE = PLUS(" " | "\n" | "\t" | "\r")
+ − 262
val RPAREN: Rexp = "{"
+ − 263
val LPAREN: Rexp = "}"
+ − 264
val STRING: Rexp = "\"" ~ SYM.% ~ "\""
+ − 265
+ − 266
+ − 267
//ab \ a --> 1b
+ − 268
//
+ − 269
val WHILE_REGS = (("k" $ KEYWORD) |
+ − 270
("i" $ ID) |
+ − 271
("o" $ OP) |
+ − 272
("n" $ NUM) |
+ − 273
("s" $ SEMI) |
+ − 274
("str" $ STRING) |
+ − 275
("p" $ (LPAREN | RPAREN)) |
+ − 276
("w" $ WHITESPACE)).%
+ − 277
+ − 278
val NREGS = NTIMES(5, OPTIONAL(SYM))
+ − 279
val NREGS1 = ("test" $ NREGS)
+ − 280
// Two Simple While Tests
+ − 281
//========================
+ − 282
val NOTREG = "hehe" ~ NOT((ANYCHAR.%) ~ "haha" ~ (ANYCHAR.%))
+ − 283
+ − 284
+ − 285
// bnullable function: tests whether the aregular
+ − 286
// expression can recognise the empty string
+ − 287
def bnullable (r: ARexp) : Boolean = r match {
+ − 288
case AZERO => false
+ − 289
case AONE(_) => true
+ − 290
case ACHAR(_,_) => false
+ − 291
case AALTS(_, rs) => rs.exists(bnullable)
+ − 292
case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
+ − 293
case ASTAR(_, _) => true
+ − 294
case ANOT(_, rn) => !bnullable(rn)
+ − 295
}
+ − 296
+ − 297
def mkepsBC(r: ARexp) : Bits = r match {
+ − 298
case AONE(bs) => bs
+ − 299
case AALTS(bs, rs) => {
+ − 300
val n = rs.indexWhere(bnullable)
+ − 301
bs ++ mkepsBC(rs(n))
+ − 302
}
+ − 303
case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
+ − 304
case ASTAR(bs, r) => bs ++ List(Z)
+ − 305
case ANOT(bs, rn) => bs
+ − 306
}
+ − 307
+ − 308
+ − 309
def bder(c: Char, r: ARexp) : ARexp = r match {
+ − 310
case AZERO => AZERO
+ − 311
case AONE(_) => AZERO
+ − 312
case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
+ − 313
case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
+ − 314
case ASEQ(bs, r1, r2) =>
+ − 315
if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
+ − 316
else ASEQ(bs, bder(c, r1), r2)
+ − 317
case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
+ − 318
case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
+ − 319
case AANYCHAR(bs) => AONE(bs)
+ − 320
}
+ − 321
+ − 322
def fuse(bs: Bits, r: ARexp) : ARexp = r match {
+ − 323
case AZERO => AZERO
+ − 324
case AONE(cs) => AONE(bs ++ cs)
+ − 325
case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
+ − 326
case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
+ − 327
case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
+ − 328
case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
+ − 329
case ANOT(cs, r) => ANOT(bs ++ cs, r)
+ − 330
}
+ − 331
+ − 332
+ − 333
def internalise(r: Rexp) : ARexp = r match {
+ − 334
case ZERO => AZERO
+ − 335
case ONE => AONE(Nil)
+ − 336
case CHAR(c) => ACHAR(Nil, c)
+ − 337
//case PRED(f) => APRED(Nil, f)
+ − 338
case ALTS(r1, r2) =>
+ − 339
AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
+ − 340
// case ALTS(r1::rs) => {
+ − 341
// val AALTS(Nil, rs2) = internalise(ALTS(rs))
+ − 342
// AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
+ − 343
// }
+ − 344
case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
+ − 345
case STAR(r) => ASTAR(Nil, internalise(r))
+ − 346
case RECD(x, r) => internalise(r)
+ − 347
case NOT(r) => ANOT(Nil, internalise(r))
+ − 348
case ANYCHAR => AANYCHAR(Nil)
+ − 349
}
+ − 350
+ − 351
+ − 352
def bsimp(r: ARexp): ARexp =
+ − 353
{
+ − 354
r match {
+ − 355
case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
+ − 356
case (AZERO, _) => AZERO
+ − 357
case (_, AZERO) => AZERO
+ − 358
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 359
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 360
}
+ − 361
case AALTS(bs1, rs) => {
+ − 362
val rs_simp = rs.map(bsimp(_))
+ − 363
val flat_res = flats(rs_simp)
409
+ − 364
val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
403
+ − 365
dist_res match {
+ − 366
case Nil => AZERO
+ − 367
case s :: Nil => fuse(bs1, s)
+ − 368
case rs => AALTS(bs1, rs)
+ − 369
}
+ − 370
+ − 371
}
+ − 372
case r => r
+ − 373
}
+ − 374
}
409
+ − 375
def strongBsimp(r: ARexp): ARexp =
+ − 376
{
+ − 377
r match {
+ − 378
case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
+ − 379
case (AZERO, _) => AZERO
+ − 380
case (_, AZERO) => AZERO
+ − 381
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ − 382
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
+ − 383
}
+ − 384
case AALTS(bs1, rs) => {
+ − 385
val rs_simp = rs.map(strongBsimp(_))
+ − 386
val flat_res = flats(rs_simp)
412
+ − 387
val dist_res = distinctBy2(flat_res)//distinctBy(flat_res, erase)
409
+ − 388
dist_res match {
+ − 389
case Nil => AZERO
+ − 390
case s :: Nil => fuse(bs1, s)
+ − 391
case rs => AALTS(bs1, rs)
+ − 392
}
+ − 393
+ − 394
}
+ − 395
case r => r
+ − 396
}
+ − 397
}
+ − 398
403
+ − 399
def bders (s: List[Char], r: ARexp) : ARexp = s match {
+ − 400
case Nil => r
+ − 401
case c::s => bders(s, bder(c, r))
+ − 402
}
+ − 403
+ − 404
def flats(rs: List[ARexp]): List[ARexp] = rs match {
+ − 405
case Nil => Nil
+ − 406
case AZERO :: rs1 => flats(rs1)
+ − 407
case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
+ − 408
case r1 :: rs2 => r1 :: flats(rs2)
+ − 409
}
+ − 410
+ − 411
def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
+ − 412
case Nil => Nil
+ − 413
case (x::xs) => {
+ − 414
val res = f(x)
+ − 415
if (acc.contains(res)) distinctBy(xs, f, acc)
+ − 416
else x::distinctBy(xs, f, res::acc)
+ − 417
}
+ − 418
}
+ − 419
412
+ − 420
def projectFirstChild(rs: List[ARexp]) : List[ARexp] = rs match {
+ − 421
case (ASEQ(bs, r1, r2p)::rs1) => r1::projectFirstChild(rs1)
+ − 422
case Nil => Nil
+ − 423
}
+ − 424
+ − 425
+ − 426
def distinctBy2(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match {
+ − 427
case Nil => Nil
+ − 428
case (x::xs) => {
+ − 429
val res = erase(x)
+ − 430
if(acc.contains(res))
+ − 431
distinctBy2(xs, acc)
+ − 432
else
+ − 433
x match {
+ − 434
case ASEQ(bs0, AALTS(bs1, rs), r2) =>
+ − 435
val newTerms = distinctBy2(rs.map(r1 => ASEQ(Nil, r1, r2)), acc)
+ − 436
val rsPrime = projectFirstChild(newTerms)
+ − 437
newTerms match {
+ − 438
case Nil => distinctBy2(xs, acc)
+ − 439
case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy2(xs, erase(t)::acc)
+ − 440
case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy2(xs, newTerms.map(erase(_)):::acc)
+ − 441
}
+ − 442
case x => x::distinctBy2(xs, res::acc)
+ − 443
}
+ − 444
}
414
+ − 445
}
+ − 446
+ − 447
+ − 448
+ − 449
def distinctBy3(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match {
+ − 450
case Nil => Nil
+ − 451
case (x::xs) => {
+ − 452
val res = erase(x)
+ − 453
if(acc.contains(res))
+ − 454
distinctBy3(xs, acc)
+ − 455
else
+ − 456
x match {
+ − 457
case ASEQ(bs0, AALTS(bs1, rs), r2) =>
+ − 458
val newTerms = distinctBy3(rs.map(r1 => ASEQ(Nil, r1, r2)), acc)
+ − 459
val rsPrime = projectFirstChild(newTerms)
+ − 460
newTerms match {
+ − 461
case Nil => distinctBy3(xs, acc)
+ − 462
case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy3(xs, erase(t)::acc)
+ − 463
case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy3(xs, newTerms.map(erase(_)):::acc)
+ − 464
}
+ − 465
case x => x::distinctBy3(xs, res::acc)
+ − 466
}
+ − 467
}
+ − 468
}
+ − 469
+ − 470
def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
+ − 471
case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
+ − 472
case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
+ − 473
case _ => r::Nil
412
+ − 474
}
+ − 475
403
+ − 476
def prettyRexp(r: Rexp) : String = r match {
+ − 477
case STAR(r0) => s"${prettyRexp(r0)}*"
+ − 478
case SEQ(CHAR(c), r2) => c.toString ++ prettyRexp(r2)
+ − 479
case SEQ(r1, r2) => s"${prettyRexp(r1)}~${prettyRexp(r2)}"
+ − 480
case CHAR(c) => c.toString
+ − 481
case ANYCHAR => "."
+ − 482
// case NOT(r0) => s
+ − 483
}
+ − 484
+ − 485
def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
+ − 486
case (ONE, bs) => (Empty, bs)
+ − 487
case (CHAR(f), bs) => (Chr(f), bs)
+ − 488
case (ALTS(r1, r2), Z::bs1) => {
+ − 489
val (v, bs2) = decode_aux(r1, bs1)
+ − 490
(Left(v), bs2)
+ − 491
}
+ − 492
case (ALTS(r1, r2), S::bs1) => {
+ − 493
val (v, bs2) = decode_aux(r2, bs1)
+ − 494
(Right(v), bs2)
+ − 495
}
+ − 496
case (SEQ(r1, r2), bs) => {
+ − 497
val (v1, bs1) = decode_aux(r1, bs)
+ − 498
val (v2, bs2) = decode_aux(r2, bs1)
+ − 499
(Sequ(v1, v2), bs2)
+ − 500
}
+ − 501
case (STAR(r1), S::bs) => {
+ − 502
val (v, bs1) = decode_aux(r1, bs)
+ − 503
//println(v)
+ − 504
val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
+ − 505
(Stars(v::vs), bs2)
+ − 506
}
+ − 507
case (STAR(_), Z::bs) => (Stars(Nil), bs)
+ − 508
case (RECD(x, r1), bs) => {
+ − 509
val (v, bs1) = decode_aux(r1, bs)
+ − 510
(Rec(x, v), bs1)
+ − 511
}
+ − 512
case (NOT(r), bs) => (Nots(prettyRexp(r)), bs)
+ − 513
}
+ − 514
+ − 515
def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
+ − 516
case (v, Nil) => v
+ − 517
case _ => throw new Exception("Not decodable")
+ − 518
}
+ − 519
+ − 520
+ − 521
+ − 522
def blexSimp(r: Rexp, s: String) : List[Bit] = {
+ − 523
blex_simp(internalise(r), s.toList)
+ − 524
}
+ − 525
+ − 526
def blexing_simp(r: Rexp, s: String) : Val = {
+ − 527
val bit_code = blex_simp(internalise(r), s.toList)
+ − 528
decode(r, bit_code)
+ − 529
}
+ − 530
414
+ − 531
def strong_blexing_simp(r: Rexp, s: String) : Val = {
+ − 532
decode(r, strong_blex_simp(internalise(r), s.toList))
+ − 533
}
+ − 534
+ − 535
def strong_blex_simp(r: ARexp, s: List[Char]) :Bits = s match {
+ − 536
case Nil => {
+ − 537
if (bnullable(r)) {
+ − 538
//println(asize(r))
+ − 539
val bits = mkepsBC(r)
+ − 540
bits
+ − 541
}
+ − 542
else
+ − 543
throw new Exception("Not matched")
+ − 544
}
+ − 545
case c::cs => {
+ − 546
val der_res = bder(c,r)
+ − 547
val simp_res = strongBsimp(der_res)
+ − 548
strong_blex_simp(simp_res, cs)
+ − 549
}
+ − 550
}
+ − 551
403
+ − 552
+ − 553
+ − 554
def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
+ − 555
case Nil => r
+ − 556
case c::s => bders_simp(s, bsimp(bder(c, r)))
+ − 557
}
+ − 558
+ − 559
def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
+ − 560
412
+ − 561
def bders_simpS(s: List[Char], r: ARexp) : ARexp = s match {
+ − 562
case Nil => r
+ − 563
case c::s => bders_simpS(s, strongBsimp(bder(c, r)))
+ − 564
}
+ − 565
+ − 566
def bdersSimpS(s: String, r: Rexp) : ARexp = bders_simpS(s.toList, internalise(r))
403
+ − 567
+ − 568
def erase(r:ARexp): Rexp = r match{
+ − 569
case AZERO => ZERO
+ − 570
case AONE(_) => ONE
+ − 571
case ACHAR(bs, c) => CHAR(c)
+ − 572
case AALTS(bs, Nil) => ZERO
+ − 573
case AALTS(bs, a::Nil) => erase(a)
+ − 574
case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
+ − 575
case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
+ − 576
case ASTAR(cs, r)=> STAR(erase(r))
+ − 577
case ANOT(bs, r) => NOT(erase(r))
+ − 578
case AANYCHAR(bs) => ANYCHAR
+ − 579
}
+ − 580
+ − 581
def breakHead(r: ARexp) : List[ARexp] = r match {
+ − 582
case AALTS(bs, rs) => rs
+ − 583
case r => r::Nil
+ − 584
}
+ − 585
+ − 586
def distinctByWithAcc[B, C](xs: List[B], f: B => C,
+ − 587
acc: List[C] = Nil, accB: List[B] = Nil): (List[B], List[C]) = xs match {
+ − 588
case Nil => (accB.reverse, acc)
+ − 589
case (x::xs) => {
+ − 590
val res = f(x)
+ − 591
if (acc.contains(res)) distinctByWithAcc(xs, f, acc, accB)
+ − 592
else distinctByWithAcc(xs, f, res::acc, x::accB)
+ − 593
}
+ − 594
}
+ − 595
412
+ − 596
//(r1+r2)r + (r2'+r3)r' ~> (r1+r2)r + (r3)r' (erase r = erase r') (erase r2 = erase r2')
+ − 597
//s = s1@s2 \in L R s1 \in L r2 s2 \in L r
+ − 598
//s = s3@s4 s3 \in L r1 s4 \in L r |s3| < |s1| \nexists s3' s4' s.t. s3'@s4' = s and s3' \in L r1 s4' \in L r
+ − 599
//(r1+r4)r ~> r1r +_usedToBeSeq r4r
+ − 600
// | ss7: "erase a01 = erase a02 ∧ (distinctBy as2 erase (set (map erase as1)) = as2p) ⟹
+ − 601
// (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2) (ASTAR bs02 a02)]@rsc) sâ†
+ − 602
// (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2p) (ASTAR bs02 a02)]@rsc)"
+ − 603
// | ss8: "erase a01 = erase a02 ∧ (distinctBy [a2] erase (set (map erase as1)) = []) ⟹
+ − 604
// (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs a2 (ASTAR bs02 a02)]@rsc) sâ†
+ − 605
// (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@rsc)"
+ − 606
// | ss9: "erase a01 = erase a02 ∧ (distinctBy as2 erase {erase a1} = as2p) ⟹
+ − 607
// (rsa@[ASEQ bs a1 (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2) (ASTAR bs02 a02)]@rsc) sâ†
+ − 608
// (rsa@[ASEQ bs a1 (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2p) (ASTAR bs02 a02)]@rsc)"
403
+ − 609
412
+ − 610
+ − 611
//# of top-level terms
+ − 612
// r1r2 -> r11r2 + r12 -> r21r2 + r22 + r12' -> 4 terms -> 5 terms -> 6..........
+ − 613
// if string long enough --> #| r1r2 \s s | > unbounded? No -> #| r1r2 \s s | < #| pders UNIV r1| + #|pders UNIV r2| <= awidth r1r2
+ − 614
//r* -> r1r* -> r21r* + r22r* -> 4 terms -> 8 terms..............
409
+ − 615
def strongDB(xs: List[ARexp],
403
+ − 616
acc1: List[Rexp] = Nil,
+ − 617
acc2 : List[(List[Rexp], Rexp)] = Nil): List[ARexp] = xs match {
+ − 618
case Nil => Nil
+ − 619
case (x::xs) =>
+ − 620
if(acc1.contains(erase(x)))
409
+ − 621
strongDB(xs, acc1, acc2)
403
+ − 622
else{
+ − 623
x match {
+ − 624
case ASTAR(bs0, r0) =>
+ − 625
val headList : List[ARexp] = List[ARexp](AONE(Nil))
+ − 626
val i = acc2.indexWhere(
+ − 627
r2stl => {val (r2s, tl) = r2stl; tl == erase(r0) }
+ − 628
)
+ − 629
if(i == -1){
409
+ − 630
x::strongDB(
403
+ − 631
xs, erase(x)::acc1, (ONE::Nil, erase(r0))::acc2
+ − 632
)
+ − 633
}
+ − 634
else{
+ − 635
val headListAlready = acc2(i)
+ − 636
val (newHeads, oldHeadsUpdated) =
+ − 637
distinctByWithAcc(headList, erase, headListAlready._1)
+ − 638
newHeads match{
+ − 639
case newHead::Nil =>
+ − 640
ASTAR(bs0, r0) ::
409
+ − 641
strongDB(xs, erase(x)::acc1,
412
+ − 642
acc2.updated(i, (oldHeadsUpdated, headListAlready._2)) )
403
+ − 643
case Nil =>
409
+ − 644
strongDB(xs, erase(x)::acc1,
403
+ − 645
acc2)
+ − 646
}
+ − 647
}
+ − 648
case ASEQ(bs, r1, ASTAR(bs0, r0)) =>
+ − 649
val headList = breakHead(r1)
+ − 650
val i = acc2.indexWhere(
+ − 651
r2stl => {val (r2s, tl) = r2stl; tl == erase(r0) }
+ − 652
)
+ − 653
if(i == -1){
409
+ − 654
x::strongDB(
403
+ − 655
xs, erase(x)::acc1, (headList.map(erase(_)), erase(r0))::acc2
+ − 656
)
+ − 657
}
+ − 658
else{
+ − 659
val headListAlready = acc2(i)
+ − 660
val (newHeads, oldHeadsUpdated) =
+ − 661
distinctByWithAcc(headList, erase, headListAlready._1)
+ − 662
newHeads match{
+ − 663
case newHead::Nil =>
+ − 664
ASEQ(bs, newHead, ASTAR(bs0, r0)) ::
409
+ − 665
strongDB(xs, erase(x)::acc1,
403
+ − 666
acc2.updated(i, (oldHeadsUpdated, headListAlready._2)) )//TODO: acc2 already contains headListAlready
+ − 667
case Nil =>
409
+ − 668
strongDB(xs, erase(x)::acc1,
403
+ − 669
acc2)
+ − 670
case hds => val AALTS(bsp, rsp) = r1
+ − 671
ASEQ(bs, AALTS(bsp, hds), ASTAR(bs0, r0)) ::
409
+ − 672
strongDB(xs, erase(x)::acc1,
403
+ − 673
acc2.updated(i, (oldHeadsUpdated, headListAlready._2)))
+ − 674
}
+ − 675
}
409
+ − 676
case rPrime => x::strongDB(xs, erase(x)::acc1, acc2)
403
+ − 677
}
+ − 678
+ − 679
}
414
+ − 680
403
+ − 681
}
+ − 682
414
+ − 683
def allCharSeq(r: Rexp) : Boolean = r match {
+ − 684
case CHAR(c) => true
+ − 685
case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
+ − 686
case _ => false
+ − 687
}
+ − 688
+ − 689
def flattenSeq(r: Rexp) : String = r match {
+ − 690
case CHAR(c) => c.toString
+ − 691
case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
+ − 692
case _ => throw new Error("flatten unflattenable rexp")
+ − 693
}
+ − 694
412
+ − 695
def shortRexpOutput(r: Rexp) : String = r match {
+ − 696
case CHAR(c) => c.toString
+ − 697
case ONE => "1"
+ − 698
case ZERO => "0"
414
+ − 699
case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
412
+ − 700
case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
+ − 701
case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
+ − 702
case STAR(r) => "[" ++ shortRexpOutput(r) ++ "]*"
+ − 703
//case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
+ − 704
//case RTOP => "RTOP"
+ − 705
}
+ − 706
403
+ − 707
+ − 708
def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
+ − 709
case Nil => {
+ − 710
if (bnullable(r)) {
+ − 711
//println(asize(r))
+ − 712
val bits = mkepsBC(r)
+ − 713
bits
+ − 714
}
414
+ − 715
else
+ − 716
throw new Exception("Not matched")
403
+ − 717
}
+ − 718
case c::cs => {
+ − 719
val der_res = bder(c,r)
+ − 720
val simp_res = bsimp(der_res)
+ − 721
blex_simp(simp_res, cs)
+ − 722
}
+ − 723
}
+ − 724
def size(r: Rexp) : Int = r match {
+ − 725
case ZERO => 1
+ − 726
case ONE => 1
+ − 727
case CHAR(_) => 1
+ − 728
case ANYCHAR => 1
+ − 729
case NOT(r0) => 1 + size(r0)
+ − 730
case SEQ(r1, r2) => 1 + size(r1) + size(r2)
+ − 731
case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
+ − 732
case STAR(r) => 1 + size(r)
+ − 733
}
+ − 734
+ − 735
def asize(a: ARexp) = size(erase(a))
+ − 736
412
+ − 737
//pder related
+ − 738
type Mon = (Char, Rexp)
+ − 739
type Lin = Set[Mon]
+ − 740
+ − 741
def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
+ − 742
case ZERO => Set()
+ − 743
case ONE => rs
+ − 744
case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) )
+ − 745
}
+ − 746
def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
+ − 747
case ZERO => Set()
+ − 748
case ONE => l
+ − 749
case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) } )
+ − 750
}
+ − 751
def lf(r: Rexp): Lin = r match {
+ − 752
case ZERO => Set()
+ − 753
case ONE => Set()
+ − 754
case CHAR(f) => {
+ − 755
//val Some(c) = alphabet.find(f)
+ − 756
Set((f, ONE))
+ − 757
}
+ − 758
case ALTS(r1, r2) => {
+ − 759
lf(r1 ) ++ lf(r2)
+ − 760
}
+ − 761
case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
+ − 762
case SEQ(r1, r2) =>{
+ − 763
if (nullable(r1))
+ − 764
cir_prod(lf(r1), r2) ++ lf(r2)
+ − 765
else
+ − 766
cir_prod(lf(r1), r2)
+ − 767
}
+ − 768
}
+ − 769
def lfs(r: Set[Rexp]): Lin = {
+ − 770
r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
+ − 771
}
+ − 772
+ − 773
def pder(x: Char, t: Rexp): Set[Rexp] = {
+ − 774
val lft = lf(t)
+ − 775
(lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
+ − 776
}
+ − 777
def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
+ − 778
case x::xs => pders(xs, pder(x, t))
+ − 779
case Nil => Set(t)
+ − 780
}
+ − 781
def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
+ − 782
case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 783
case Nil => ts
+ − 784
}
+ − 785
def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
+ − 786
def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
+ − 787
//all implementation of partial derivatives that involve set union are potentially buggy
+ − 788
//because they don't include the original regular term before they are pdered.
+ − 789
//now only pderas is fixed.
+ − 790
def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
414
+ − 791
def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
412
+ − 792
def awidth(r: Rexp) : Int = r match {
+ − 793
case CHAR(c) => 1
+ − 794
case SEQ(r1, r2) => awidth(r1) + awidth(r2)
+ − 795
case ALTS(r1, r2) => awidth(r1) + awidth(r2)
+ − 796
case ONE => 0
+ − 797
case ZERO => 0
+ − 798
case STAR(r) => awidth(r)
+ − 799
}
+ − 800
//def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
+ − 801
//def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
+ − 802
def o(r: Rexp) = if (nullable(r)) ONE else ZERO
+ − 803
//def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
+ − 804
def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
+ − 805
case ZERO => Set[Rexp]()
+ − 806
case ONE => Set[Rexp]()
+ − 807
case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
+ − 808
case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
+ − 809
case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
+ − 810
case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
+ − 811
}
+ − 812
def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
+ − 813
case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
+ − 814
case Nil => ts
+ − 815
}
+ − 816
def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
+ − 817
+ − 818
403
+ − 819
+ − 820
// @arg(doc = "small tests")
414
+ − 821
val STARREG = (((STAR("a") | (STAR("aa")) | STAR("aaa") | STAR("aaaa") | STAR("aaaaa") | (STAR("aaaaaa")) | STAR("aaaaaaa") | STAR("aaaaaaaa")).%))
+ − 822
//(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa")).%).%).%
403
+ − 823
414
+ − 824
// @main
403
+ − 825
def small() = {
+ − 826
412
+ − 827
403
+ − 828
// println(lexing_simp(NOTREG, prog0))
+ − 829
// val v = lex_simp(NOTREG, prog0.toList)
+ − 830
// println(v)
+ − 831
+ − 832
// val d = (lex_simp(NOTREG, prog0.toList))
+ − 833
// println(d)
412
+ − 834
val pderSTAR = pderUNIV(STARREG)
414
+ − 835
412
+ − 836
val refSize = pderSTAR.map(size(_)).sum
414
+ − 837
println("different partial derivative terms:")
+ − 838
pderSTAR.foreach(r => r match {
+ − 839
+ − 840
case SEQ(head, rstar) =>
+ − 841
println(shortRexpOutput(head) ++ "~STARREG")
+ − 842
case STAR(rstar) =>
+ − 843
println("STARREG")
+ − 844
+ − 845
}
+ − 846
)
+ − 847
println("the total number of terms is")
+ − 848
//println(refSize)
+ − 849
println(pderSTAR.size)
415
+ − 850
for(i <- List(1, 10, 100, 400, 800, 840, 841, 900) ){
412
+ − 851
val prog0 = "a" * i
414
+ − 852
//println(s"test: $prog0")
+ − 853
println(s"testing with $i a's" )
412
+ − 854
val bd = bdersSimp(prog0, STARREG)//DB
+ − 855
val sbd = bdersSimpS(prog0, STARREG)//strongDB
414
+ − 856
val subTerms = breakIntoTerms(erase(sbd))
+ − 857
val subTermsLarge = breakIntoTerms(erase(bd))
+ − 858
+ − 859
println(s"subterms of regex with strongDB: ${subTerms.length}, standard DB: ${subTermsLarge.length}")
+ − 860
println("the number of distinct subterms for bsimp with strongDB and standardDB")
+ − 861
println(subTerms.distinct.size)
+ − 862
println(subTermsLarge.distinct.size)
415
+ − 863
println("which coincides with the number of PDER terms")
403
+ − 864
412
+ − 865
+ − 866
// println(shortRexpOutput(erase(sbd)))
+ − 867
// println(shortRexpOutput(erase(bd)))
414
+ − 868
415
+ − 869
println("pdersize, original, strongSimp, bsimp")
414
+ − 870
println(refSize, size(STARREG), asize(sbd), asize(bd))
403
+ − 871
414
+ − 872
// val vres = strong_blexing_simp( STARREG, prog0)
+ − 873
// println(vres)
412
+ − 874
}
403
+ − 875
// println(vs.length)
+ − 876
// println(vs)
+ − 877
+ − 878
+ − 879
// val prog1 = """read n; write n"""
+ − 880
// println(s"test: $prog1")
+ − 881
// println(lexing_simp(WHILE_REGS, prog1))
+ − 882
}
+ − 883
414
+ − 884
small()
+ − 885
403
+ − 886
// // Bigger Tests
+ − 887
// //==============
+ − 888
+ − 889
// // escapes strings and prints them out as "", "\n" and so on
+ − 890
// def esc(raw: String): String = {
+ − 891
// import scala.reflect.runtime.universe._
+ − 892
// Literal(Constant(raw)).toString
+ − 893
// }
+ − 894
+ − 895
// def escape(tks: List[(String, String)]) =
+ − 896
// tks.map{ case (s1, s2) => (s1, esc(s2))}
+ − 897
+ − 898
+ − 899
// val prog2 = """
+ − 900
// write "Fib";
+ − 901
// read n;
+ − 902
// minus1 := 0;
+ − 903
// minus2 := 1;
+ − 904
// while n > 0 do {
+ − 905
// temp := minus2;
+ − 906
// minus2 := minus1 + minus2;
+ − 907
// minus1 := temp;
+ − 908
// n := n - 1
+ − 909
// };
+ − 910
// write "Result";
+ − 911
// write minus2
+ − 912
// """
+ − 913
+ − 914
// @arg(doc = "Fibonacci test")
+ − 915
// @main
+ − 916
// def fib() = {
+ − 917
// println("lexing fib program")
+ − 918
// println(escape(lexing_simp(WHILE_REGS, prog2)).mkString("\n"))
+ − 919
// }
+ − 920
+ − 921
+ − 922
// val prog3 = """
+ − 923
// start := 1000;
+ − 924
// x := start;
+ − 925
// y := start;
+ − 926
// z := start;
+ − 927
// while 0 < x do {
+ − 928
// while 0 < y do {
+ − 929
// while 0 < z do {
+ − 930
// z := z - 1
+ − 931
// };
+ − 932
// z := start;
+ − 933
// y := y - 1
+ − 934
// };
+ − 935
// y := start;
+ − 936
// x := x - 1
+ − 937
// }
+ − 938
// """
+ − 939
+ − 940
// @arg(doc = "Loops test")
+ − 941
// @main
+ − 942
// def loops() = {
+ − 943
// println("lexing Loops")
+ − 944
// println(escape(lexing_simp(WHILE_REGS, prog3)).mkString("\n"))
+ − 945
// }
+ − 946
+ − 947
// @arg(doc = "Email Test")
+ − 948
// @main
+ − 949
// def email() = {
+ − 950
// val lower = "abcdefghijklmnopqrstuvwxyz"
+ − 951
+ − 952
// val NAME = RECD("name", PLUS(RANGE(lower ++ "_.-")))
+ − 953
// val DOMAIN = RECD("domain", PLUS(RANGE(lower ++ "-")))
+ − 954
// val RE = RANGE(lower ++ ".")
+ − 955
// val TOPLEVEL = RECD("top", (RE ~ RE) |
+ − 956
// (RE ~ RE ~ RE) |
+ − 957
// (RE ~ RE ~ RE ~ RE) |
+ − 958
// (RE ~ RE ~ RE ~ RE ~ RE) |
+ − 959
// (RE ~ RE ~ RE ~ RE ~ RE ~ RE))
+ − 960
+ − 961
// val EMAIL = NAME ~ "@" ~ DOMAIN ~ "." ~ TOPLEVEL
+ − 962
+ − 963
// println(lexing_simp(EMAIL, "christian.urban@kcl.ac.uk"))
+ − 964
// }
+ − 965
+ − 966
+ − 967
// @arg(doc = "All tests.")
+ − 968
// @main
+ − 969
// def all() = { small(); fib() ; loops() ; email() }
+ − 970
+ − 971
+ − 972
+ − 973
+ − 974
// runs with amm2 and amm3
+ − 975
+ − 976
+ − 977