// A simple lexer inspired by work of Sulzmann & Lu
//==================================================
//
// Call the test cases with
//
// amm lexer.sc small
// amm lexer.sc fib
// amm lexer.sc loops
// amm lexer.sc email
//
// amm lexer.sc all
import scala.util.Try
// regular expressions including records
abstract class Rexp
case object ZERO extends Rexp
case object ONE extends Rexp
case object ANYCHAR extends Rexp
case class CHAR(c: Char) extends Rexp
case class ALTS(r1: Rexp, r2: Rexp) extends Rexp
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp
case class STAR(r: Rexp) extends Rexp
case class RECD(x: String, r: Rexp) extends Rexp
case class NTIMES(n: Int, r: Rexp) extends Rexp
case class OPTIONAL(r: Rexp) extends Rexp
case class NOT(r: Rexp) extends Rexp
// records for extracting strings or tokens
// values
abstract class Val
case object Failure extends Val
case object Empty extends Val
case class Chr(c: Char) extends Val
case class Sequ(v1: Val, v2: Val) extends Val
case class Left(v: Val) extends Val
case class Right(v: Val) extends Val
case class Stars(vs: List[Val]) extends Val
case class Rec(x: String, v: Val) extends Val
case class Ntime(vs: List[Val]) extends Val
case class Optionall(v: Val) extends Val
case class Nots(s: String) extends Val
abstract class Bit
case object Z extends Bit
case object S extends Bit
type Bits = List[Bit]
abstract class ARexp
case object AZERO extends ARexp
case class AONE(bs: Bits) extends ARexp
case class ACHAR(bs: Bits, c: Char) extends ARexp
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp
case class ASTAR(bs: Bits, r: ARexp) extends ARexp
case class ANOT(bs: Bits, r: ARexp) extends ARexp
case class AANYCHAR(bs: Bits) extends ARexp
trait Generator[+T] {
self => // an alias for "this"
def generate(): T
def gen(n: Int) : List[T] =
if (n == 0) Nil else self.generate() :: gen(n - 1)
def map[S](f: T => S): Generator[S] = new Generator[S] {
def generate = f(self.generate())
}
def flatMap[S](f: T => Generator[S]): Generator[S] = new Generator[S] {
def generate = f(self.generate()).generate()
}
}
// tests a property according to a given random generator
def test[T](r: Generator[T], amount: Int = 100)(pred: T => Boolean) {
for (_ <- 0 until amount) {
val value = r.generate()
assert(pred(value), s"Test failed for: $value")
}
println(s"Test passed $amount times")
}
def test2[T, S](r: Generator[T], s: Generator[S], amount: Int = 100)(pred: (T, S) => Boolean) {
for (_ <- 0 until amount) {
val valueR = r.generate()
val valueS = s.generate()
assert(pred(valueR, valueS), s"Test failed for: $valueR, $valueS")
}
println(s"Test passed $amount times")
}
// random integers
val integers = new Generator[Int] {
val rand = new java.util.Random
def generate() = rand.nextInt()
}
// random booleans
val booleans = integers.map(_ > 0)
// random integers in the range lo and high
def range(lo: Int, hi: Int): Generator[Int] =
for (x <- integers) yield (lo + x.abs % (hi - lo)).abs
// random characters
def chars_range(lo: Char, hi: Char) = range(lo, hi).map(_.toChar)
val chars = chars_range('a', 'z')
def oneOf[T](xs: T*): Generator[T] =
for (idx <- range(0, xs.length)) yield xs(idx)
def single[T](x: T) = new Generator[T] {
def generate() = x
}
def pairs[T, U](t: Generator[T], u: Generator[U]): Generator[(T, U)] =
for (x <- t; y <- u) yield (x, y)
// lists
def emptyLists = single(Nil)
def nonEmptyLists : Generator[List[Int]] =
for (head <- integers; tail <- lists) yield head :: tail
def lists: Generator[List[Int]] = for {
kind <- booleans
list <- if (kind) emptyLists else nonEmptyLists
} yield list
def char_list(len: Int): Generator[List[Char]] = {
if(len <= 0) single(Nil)
else{
for {
c <- chars_range('a', 'd')
tail <- char_list(len - 1)
} yield c :: tail
}
}
def strings(len: Int): Generator[String] = for(cs <- char_list(len)) yield cs.toString
def sampleString(r: Rexp) : List[String] = r match {
case STAR(r) => stringsFromRexp(r).flatMap(s => List("", s, s ++ s))//only generate 0, 1, 2 reptitions
case SEQ(r1, r2) => stringsFromRexp(r1).flatMap(s1 => stringsFromRexp(r2).map(s2 => s1 ++ s2) )
case ALTS(r1, r2) => throw new Error(s" Rexp ${r} not expected: all alternatives are supposed to have been opened up")
case ONE => "" :: Nil
case ZERO => Nil
case CHAR(c) => c.toString :: Nil
}
def stringsFromRexp(r: Rexp) : List[String] =
breakIntoTerms(r).flatMap(r => sampleString(r))
// (simple) binary trees
trait Tree[T]
case class Inner[T](left: Tree[T], right: Tree[T]) extends Tree[T]
case class Leaf[T](x: T) extends Tree[T]
def leafs[T](t: Generator[T]): Generator[Leaf[T]] =
for (x <- t) yield Leaf(x)
def inners[T](t: Generator[T]): Generator[Inner[T]] =
for (l <- trees(t); r <- trees(t)) yield Inner(l, r)
def trees[T](t: Generator[T]): Generator[Tree[T]] =
for (kind <- range(0, 2);
tree <- if (kind == 0) leafs(t) else inners(t)) yield tree
// regular expressions
// generates random leaf-regexes; prefers CHAR-regexes
def leaf_rexp() : Generator[Rexp] =
for (kind <- range(0, 5);
c <- chars_range('a', 'd')) yield
kind match {
case 0 => ZERO
case 1 => ONE
case _ => CHAR(c)
}
// generates random inner regexes with maximum depth d
def inner_rexp(d: Int) : Generator[Rexp] =
for (kind <- range(0, 3);
l <- rexp(d);
r <- rexp(d))
yield kind match {
case 0 => ALTS(l, r)
case 1 => SEQ(l, r)
case 2 => STAR(r)
}
// generates random regexes with maximum depth d;
// prefers inner regexes in 2/3 of the cases
def rexp(d: Int = 100): Generator[Rexp] =
for (kind <- range(0, 3);
r <- if (d <= 0 || kind == 0) leaf_rexp() else inner_rexp(d - 1)) yield r
// some test functions for rexps
def height(r: Rexp) : Int = r match {
case ZERO => 1
case ONE => 1
case CHAR(_) => 1
case ALTS(r1, r2) => 1 + List(height(r1), height(r2)).max
case SEQ(r1, r2) => 1 + List(height(r1), height(r2)).max
case STAR(r) => 1 + height(r)
}
// def size(r: Rexp) : Int = r match {
// case ZERO => 1
// case ONE => 1
// case CHAR(_) => 1
// case ALTS(r1, r2) => 1 + size(r1) + size(r2)
// case SEQ(r1, r2) => 1 + size(r1) + size(r2)
// case STAR(r) => 1 + size(r)
// }
// randomly subtracts 1 or 2 from the STAR case
def size_faulty(r: Rexp) : Int = r match {
case ZERO => 1
case ONE => 1
case CHAR(_) => 1
case ALTS(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
case SEQ(r1, r2) => 1 + size_faulty(r1) + size_faulty(r2)
case STAR(r) => 1 + size_faulty(r) - range(0, 2).generate
}
// some convenience for typing in regular expressions
def charlist2rexp(s : List[Char]): Rexp = s match {
case Nil => ONE
case c::Nil => CHAR(c)
case c::s => SEQ(CHAR(c), charlist2rexp(s))
}
implicit def string2rexp(s : String) : Rexp =
charlist2rexp(s.toList)
implicit def RexpOps(r: Rexp) = new {
def | (s: Rexp) = ALTS(r, s)
def % = STAR(r)
def ~ (s: Rexp) = SEQ(r, s)
}
implicit def stringOps(s: String) = new {
def | (r: Rexp) = ALTS(s, r)
def | (r: String) = ALTS(s, r)
def % = STAR(s)
def ~ (r: Rexp) = SEQ(s, r)
def ~ (r: String) = SEQ(s, r)
def $ (r: Rexp) = RECD(s, r)
}
def nullable(r: Rexp) : Boolean = r match {
case ZERO => false
case ONE => true
case CHAR(_) => false
case ANYCHAR => false
case ALTS(r1, r2) => nullable(r1) || nullable(r2)
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
case STAR(_) => true
case RECD(_, r1) => nullable(r1)
case NTIMES(n, r) => if (n == 0) true else nullable(r)
case OPTIONAL(r) => true
case NOT(r) => !nullable(r)
}
def der(c: Char, r: Rexp) : Rexp = r match {
case ZERO => ZERO
case ONE => ZERO
case CHAR(d) => if (c == d) ONE else ZERO
case ANYCHAR => ONE
case ALTS(r1, r2) => ALTS(der(c, r1), der(c, r2))
case SEQ(r1, r2) =>
if (nullable(r1)) ALTS(SEQ(der(c, r1), r2), der(c, r2))
else SEQ(der(c, r1), r2)
case STAR(r) => SEQ(der(c, r), STAR(r))
case RECD(_, r1) => der(c, r1)
case NTIMES(n, r) => if(n > 0) SEQ(der(c, r), NTIMES(n - 1, r)) else ZERO
case OPTIONAL(r) => der(c, r)
case NOT(r) => NOT(der(c, r))
}
// extracts a string from a value
def flatten(v: Val) : String = v match {
case Empty => ""
case Chr(c) => c.toString
case Left(v) => flatten(v)
case Right(v) => flatten(v)
case Sequ(v1, v2) => flatten(v1) ++ flatten(v2)
case Stars(vs) => vs.map(flatten).mkString
case Ntime(vs) => vs.map(flatten).mkString
case Optionall(v) => flatten(v)
case Rec(_, v) => flatten(v)
}
// extracts an environment from a value;
// used for tokenising a string
def env(v: Val) : List[(String, String)] = v match {
case Empty => Nil
case Chr(c) => Nil
case Left(v) => env(v)
case Right(v) => env(v)
case Sequ(v1, v2) => env(v1) ::: env(v2)
case Stars(vs) => vs.flatMap(env)
case Ntime(vs) => vs.flatMap(env)
case Rec(x, v) => (x, flatten(v))::env(v)
case Optionall(v) => env(v)
case Nots(s) => ("Negative", s) :: Nil
}
// The injection and mkeps part of the lexer
//===========================================
def mkeps(r: Rexp) : Val = r match {
case ONE => Empty
case ALTS(r1, r2) =>
if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
case STAR(r) => Stars(Nil)
case RECD(x, r) => Rec(x, mkeps(r))
case NTIMES(n, r) => Ntime(List.fill(n)(mkeps(r)))
case OPTIONAL(r) => Optionall(Empty)
case NOT(rInner) => if(nullable(rInner)) throw new Exception("error")
else Nots("")//Nots(s.reverse.toString)
// case NOT(ZERO) => Empty
// case NOT(CHAR(c)) => Empty
// case NOT(SEQ(r1, r2)) => Sequ(mkeps(NOT(r1)), mkeps(NOT(r2)))
// case NOT(ALTS(r1, r2)) => if(!nullable(r1)) Left(mkeps(NOT(r1))) else Right(mkeps(NOT(r2)))
// case NOT(STAR(r)) => Stars(Nil)
}
def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
case (ALTS(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
case (ALTS(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
case (CHAR(d), Empty) => Chr(c)
case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
case (NTIMES(n, r), Sequ(v1, Ntime(vs))) => Ntime(inj(r, c, v1)::vs)
case (OPTIONAL(r), v) => Optionall(inj(r, c, v))
case (NOT(r), Nots(s)) => Nots(c.toString ++ s)
case (ANYCHAR, Empty) => Chr(c)
}
// some "rectification" functions for simplification
// The Lexing Rules for the WHILE Language
// bnullable function: tests whether the aregular
// expression can recognise the empty string
def bnullable (r: ARexp) : Boolean = r match {
case AZERO => false
case AONE(_) => true
case ACHAR(_,_) => false
case AALTS(_, rs) => rs.exists(bnullable)
case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
case ASTAR(_, _) => true
case ANOT(_, rn) => !bnullable(rn)
}
def mkepsBC(r: ARexp) : Bits = r match {
case AONE(bs) => bs
case AALTS(bs, rs) => {
val n = rs.indexWhere(bnullable)
bs ++ mkepsBC(rs(n))
}
case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
case ASTAR(bs, r) => bs ++ List(Z)
case ANOT(bs, rn) => bs
}
def bder(c: Char, r: ARexp) : ARexp = r match {
case AZERO => AZERO
case AONE(_) => AZERO
case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
case ASEQ(bs, r1, r2) =>
if (bnullable(r1)) AALTS(bs, ASEQ(Nil, bder(c, r1), r2) :: fuse(mkepsBC(r1), bder(c, r2)) :: Nil )
else ASEQ(bs, bder(c, r1), r2)
case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
case ANOT(bs, rn) => ANOT(bs, bder(c, rn))
case AANYCHAR(bs) => AONE(bs)
}
def fuse(bs: Bits, r: ARexp) : ARexp = r match {
case AZERO => AZERO
case AONE(cs) => AONE(bs ++ cs)
case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
case ANOT(cs, r) => ANOT(bs ++ cs, r)
}
def internalise(r: Rexp) : ARexp = r match {
case ZERO => AZERO
case ONE => AONE(Nil)
case CHAR(c) => ACHAR(Nil, c)
//case PRED(f) => APRED(Nil, f)
case ALTS(r1, r2) =>
AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
// case ALTS(r1::rs) => {
// val AALTS(Nil, rs2) = internalise(ALTS(rs))
// AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
// }
case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
case STAR(r) => ASTAR(Nil, internalise(r))
case RECD(x, r) => internalise(r)
case NOT(r) => ANOT(Nil, internalise(r))
case ANYCHAR => AANYCHAR(Nil)
}
def bsimp(r: ARexp): ARexp =
{
r match {
case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
case (AZERO, _) => AZERO
case (_, AZERO) => AZERO
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
}
case AALTS(bs1, rs) => {
val rs_simp = rs.map(bsimp(_))
val flat_res = flats(rs_simp)
val dist_res = distinctBy(flat_res, erase)//strongDB(flat_res)//distinctBy(flat_res, erase)
dist_res match {
case Nil => AZERO
case s :: Nil => fuse(bs1, s)
case rs => AALTS(bs1, rs)
}
}
case r => r
}
}
def strongBsimp(r: ARexp): ARexp =
{
r match {
case ASEQ(bs1, r1, r2) => (strongBsimp(r1), strongBsimp(r2)) match {
case (AZERO, _) => AZERO
case (_, AZERO) => AZERO
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
}
case AALTS(bs1, rs) => {
val rs_simp = rs.map(strongBsimp(_))
val flat_res = flats(rs_simp)
val dist_res = distinctBy4(flat_res)//distinctBy(flat_res, erase)
dist_res match {
case Nil => AZERO
case s :: Nil => fuse(bs1, s)
case rs => AALTS(bs1, rs)
}
}
case r => r
}
}
def bders (s: List[Char], r: ARexp) : ARexp = s match {
case Nil => r
case c::s => bders(s, bder(c, r))
}
def flats(rs: List[ARexp]): List[ARexp] = rs match {
case Nil => Nil
case AZERO :: rs1 => flats(rs1)
case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
case r1 :: rs2 => r1 :: flats(rs2)
}
def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
case Nil => Nil
case (x::xs) => {
val res = f(x)
if (acc.contains(res)) distinctBy(xs, f, acc)
else x::distinctBy(xs, f, res::acc)
}
}
def pruneRexp(r: ARexp, allowableTerms: List[Rexp]) : ARexp = {
r match {
case ASEQ(bs, r1, r2) =>
val termsTruncated = allowableTerms.collect(rt => rt match {
case SEQ(r1p, r2p) if(r2p == erase(r2)) => r1p//if(r2p == erase(r2))
})
val pruned : ARexp = pruneRexp(r1, termsTruncated)
pruned match {
case AZERO => AZERO
case AONE(bs1) => fuse(bs ++ bs1, r2)
case pruned1 => ASEQ(bs, pruned1, r2)
}
case AALTS(bs, rs) =>
//allowableTerms.foreach(a => println(shortRexpOutput(a)))
val rsp = rs.map(r =>
pruneRexp(r, allowableTerms)
)
.filter(r => r != AZERO)
rsp match {
case Nil => AZERO
case r1::Nil => fuse(bs, r1)
case rs1 => AALTS(bs, rs1)
}
case r =>
if(allowableTerms.contains(erase(r))) r else AZERO //assert(r != AZERO)
}
}
def oneSimp(r: Rexp) : Rexp = r match {
case SEQ(ONE, r) => r
case SEQ(r1, r2) => SEQ(oneSimp(r1), r2)
case r => r//assert r != 0
}
def distinctBy4(xs: List[ARexp], acc: List[Rexp] = Nil) : List[ARexp] = xs match {
case Nil => Nil
case x :: xs =>
//assert(acc.distinct == acc)
val erased = erase(x)
if(acc.contains(erased))
distinctBy4(xs, acc)
else{
val addToAcc = breakIntoTerms(erased).filter(r => !acc.contains(oneSimp(r)))
//val xp = pruneRexp(x, addToAcc)
pruneRexp(x, addToAcc) match {
case AZERO => distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
case xPrime => xPrime :: distinctBy4(xs, addToAcc.map(oneSimp(_)) ::: acc)
}
}
}
def breakIntoTerms(r: Rexp) : List[Rexp] = r match {
case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2))
case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2)
case _ => r::Nil
}
def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
case (ONE, bs) => (Empty, bs)
case (CHAR(f), bs) => (Chr(f), bs)
case (ALTS(r1, r2), Z::bs1) => {
val (v, bs2) = decode_aux(r1, bs1)
(Left(v), bs2)
}
case (ALTS(r1, r2), S::bs1) => {
val (v, bs2) = decode_aux(r2, bs1)
(Right(v), bs2)
}
case (SEQ(r1, r2), bs) => {
val (v1, bs1) = decode_aux(r1, bs)
val (v2, bs2) = decode_aux(r2, bs1)
(Sequ(v1, v2), bs2)
}
case (STAR(r1), S::bs) => {
val (v, bs1) = decode_aux(r1, bs)
//println(v)
val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
(Stars(v::vs), bs2)
}
case (STAR(_), Z::bs) => (Stars(Nil), bs)
case (RECD(x, r1), bs) => {
val (v, bs1) = decode_aux(r1, bs)
(Rec(x, v), bs1)
}
case (NOT(r), bs) => (Nots(r.toString), bs)
}
def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
case (v, Nil) => v
case _ => throw new Exception("Not decodable")
}
def blexing_simp(r: Rexp, s: String) : Val = {
val bit_code = blex_simp(internalise(r), s.toList)
decode(r, bit_code)
}
def simpBlexer(r: Rexp, s: String) : Val = {
Try(blexing_simp(r, s)).getOrElse(Failure)
}
def strong_blexing_simp(r: Rexp, s: String) : Val = {
decode(r, strong_blex_simp(internalise(r), s.toList))
}
def strongBlexer(r: Rexp, s: String) : Val = {
Try(strong_blexing_simp(r, s)).getOrElse(Failure)
}
def strong_blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
case Nil => {
if (bnullable(r)) {
//println(asize(r))
val bits = mkepsBC(r)
bits
}
else
throw new Exception("Not matched")
}
case c::cs => {
val der_res = bder(c,r)
val simp_res = strongBsimp(der_res)
strong_blex_simp(simp_res, cs)
}
}
def bders_simp(s: List[Char], r: ARexp) : ARexp = s match {
case Nil => r
case c::s =>
println(erase(r))
bders_simp(s, bsimp(bder(c, r)))
}
def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r))
def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match {
case Nil => r
case c::s => bdersStrong(s, strongBsimp(bder(c, r)))
}
def bdersStrongRexp(s: String, r: Rexp) : ARexp = bdersStrong(s.toList, internalise(r))
def erase(r:ARexp): Rexp = r match{
case AZERO => ZERO
case AONE(_) => ONE
case ACHAR(bs, c) => CHAR(c)
case AALTS(bs, Nil) => ZERO
case AALTS(bs, a::Nil) => erase(a)
case AALTS(bs, a::as) => ALTS(erase(a), erase(AALTS(bs, as)))
case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
case ASTAR(cs, r)=> STAR(erase(r))
case ANOT(bs, r) => NOT(erase(r))
case AANYCHAR(bs) => ANYCHAR
}
def allCharSeq(r: Rexp) : Boolean = r match {
case CHAR(c) => true
case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2)
case _ => false
}
def flattenSeq(r: Rexp) : String = r match {
case CHAR(c) => c.toString
case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2)
case _ => throw new Error("flatten unflattenable rexp")
}
def shortRexpOutput(r: Rexp) : String = r match {
case CHAR(c) => c.toString
case ONE => "1"
case ZERO => "0"
case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
case STAR(STAR(r)) => "(..)*"// ++ shortRexpOutput(r) ++ "]*"
case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
//case RTOP => "RTOP"
}
def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
case Nil => {
if (bnullable(r)) {
val bits = mkepsBC(r)
bits
}
else
throw new Exception("Not matched")
}
case c::cs => {
val der_res = bder(c,r)
val simp_res = bsimp(der_res)
blex_simp(simp_res, cs)
}
}
def size(r: Rexp) : Int = r match {
case ZERO => 1
case ONE => 1
case CHAR(_) => 1
case ANYCHAR => 1
case NOT(r0) => 1 + size(r0)
case SEQ(r1, r2) => 1 + size(r1) + size(r2)
case ALTS(r1, r2) => 1 + List(r1, r2).map(size).sum
case STAR(r) => 1 + size(r)
}
def asize(a: ARexp) = size(erase(a))
//pder related
type Mon = (Char, Rexp)
type Lin = Set[Mon]
def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match {
case ZERO => Set()
case ONE => rs
case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) )
}
def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms
case ZERO => Set()
case ONE => l
case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) } )
}
def lf(r: Rexp): Lin = r match {
case ZERO => Set()
case ONE => Set()
case CHAR(f) => {
//val Some(c) = alphabet.find(f)
Set((f, ONE))
}
case ALTS(r1, r2) => {
lf(r1 ) ++ lf(r2)
}
case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later......
case SEQ(r1, r2) =>{
if (nullable(r1))
cir_prod(lf(r1), r2) ++ lf(r2)
else
cir_prod(lf(r1), r2)
}
}
def lfs(r: Set[Rexp]): Lin = {
r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r))
}
def pder(x: Char, t: Rexp): Set[Rexp] = {
val lft = lf(t)
(lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2)
}
def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match {
case x::xs => pders(xs, pder(x, t))
case Nil => Set(t)
}
def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match {
case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
case Nil => ts
}
def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc )
def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2)
//all implementation of partial derivatives that involve set union are potentially buggy
//because they don't include the original regular term before they are pdered.
//now only pderas is fixed.
def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders.
def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1)
def awidth(r: Rexp) : Int = r match {
case CHAR(c) => 1
case SEQ(r1, r2) => awidth(r1) + awidth(r2)
case ALTS(r1, r2) => awidth(r1) + awidth(r2)
case ONE => 0
case ZERO => 0
case STAR(r) => awidth(r)
}
//def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList)
//def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList)
def o(r: Rexp) = if (nullable(r)) ONE else ZERO
//def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) ))
def pdp(x: Char, r: Rexp) : Set[Rexp] = r match {
case ZERO => Set[Rexp]()
case ONE => Set[Rexp]()
case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]()
case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2)
case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1)))
case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b))
}
def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match {
case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t)))
case Nil => ts
}
def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc)
def starPrint(r: Rexp) : Unit = r match {
case SEQ(head, rstar) =>
println(shortRexpOutput(head) ++ "~STARREG")
case STAR(rstar) =>
println("STARREG")
case ALTS(r1, r2) =>
println("(")
starPrint(r1)
println("+")
starPrint(r2)
println(")")
case ZERO => println("0")
}
// @arg(doc = "small tests")
val STARREG = //((( (STAR("aa")) | STAR("aaa") ).%).%)
(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa"| ( STAR("aaaaaaa")) | STAR("aaaaaaaaaaa"))).%).%).%
// @main
def small() = {
// println(lexing_simp(NOTREG, prog0))
// val v = lex_simp(NOTREG, prog0.toList)
// println(v)
// val d = (lex_simp(NOTREG, prog0.toList))
// println(d)
val pderSTAR = pderUNIV(STARREG)
val refSize = pderSTAR.map(size(_)).sum
// println("different partial derivative terms:")
// pderSTAR.foreach(r => r match {
// case SEQ(head, rstar) =>
// println(shortRexpOutput(head) ++ "~STARREG")
// case STAR(rstar) =>
// println("STARREG")
// }
// )
// println("the total number of terms is")
// //println(refSize)
// println(pderSTAR.size)
// val A : Rexp= ("c" | (ONE | "b") ~ "d") ~((ONE).%)
// val B : Rexp = ((ONE).%)
// val C : Rexp = ("d") ~ ((ONE).%)
// val PRUNE_REG : Rexp = (C | B | A)
// val APRUNE_REG = internalise(PRUNE_REG)
// val program_solution = pruneRexp(APRUNE_REG, breakIntoTerms(PRUNE_REG))
// println("program executes and gives: as disired!")
// println(shortRexpOutput(erase(program_solution)))
// val simpedPruneReg = strongBsimp(APRUNE_REG)
// println(shortRexpOutput(erase(simpedPruneReg)))
for(i <- List(1,2,3,4,10, 100, 400, 841, 900 ) ){// 100, 400, 800, 840, 841, 900
val prog0 = "a" * i
//println(s"test: $prog0")
println(s"testing with $i a's" )
//val bd = bdersSimp(prog0, STARREG)//DB
val sbd = bdersStrongRexp(prog0, STARREG)//strongDB
starPrint(erase(sbd))
val subTerms = breakIntoTerms(erase(sbd))
//val subTermsLarge = breakIntoTerms(erase(bd))
println(s"subterms of regex with strongDB: ${subTerms.length}")//, standard DB: ${subTermsLarge.length}")
println("the number of distinct subterms for bsimp with strongDB")
println(subTerms.distinct.size)
//println(subTermsLarge.distinct.size)
if(pderSTAR.size > subTerms.length)
println(s"which is less than or equal to the number of PDER terms: ${pderSTAR.size}")
// println(shortRexpOutput(erase(sbd)))
// println(shortRexpOutput(erase(bd)))
println("pdersize, original, strongSimp")
println(refSize, size(STARREG), asize(sbd))
//val vres = strong_blexing_simp( STARREG, prog0)
//println(vres)
}
// println(vs.length)
// println(vs)
// val prog1 = """read n; write n"""
// println(s"test: $prog1")
// println(lexing_simp(WHILE_REGS, prog1))
// val display = ("a"| "ab").%
// val adisplay = internalise(display)
// bders_simp( "aaaaa".toList, adisplay)
}
def generator_test() {
// println("XXX generates 10 random integers in the range 0..2")
// println(range(0, 3).gen(10).mkString("\n"))
// println("XXX gnerates random lists and trees")
// println(lists.generate())
// println(trees(chars).generate())
// println("XXX generates 10 random characters")
// println(chars.gen(10).mkString("\n"))
// println("XXX generates 10 random leaf-regexes")
// println(leaf_rexp().gen(10).mkString("\n"))
// println("XXX generates 1000 regexes of maximal 10 height")
// println(rexp(10).gen(1000).mkString("\n"))
// println("XXX generates 1000 regexes and tests an always true-test")
// test(rexp(10), 1000) { _ => true }
// println("XXX generates regexes and tests a valid predicate")
// test(rexp(10), 1000) { r => height(r) <= size(r) }
// println("XXX faulty test")
// test(rexp(10), 100) { r => height(r) <= size_faulty(r) }
/* For inspection of counterexamples should they arise*/
// println("testing strongbsimp against bsimp")
// val r = SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))),
// SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))),
// STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))),
// SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE))
// val ss = stringsFromRexp(r)
// val boolList = ss.map(s => {
// val simpVal = simpBlexer(r, s)
// val strongVal = strongBlexer(r, s)
// println(simpVal)
// println(strongVal)
// (simpVal == strongVal) && (simpVal != None)
// })
// println(boolList)
// println(boolList.exists(b => b == false))
test(rexp(9), 100000) { (r: Rexp) =>
val ss = stringsFromRexp(r)
val boolList = ss.map(s => {
val simpVal = simpBlexer(r, s)
val strongVal = strongBlexer(r, s)
// println(simpVal)
// println(strongVal)
(simpVal == strongVal) && (simpVal != None)
})
!boolList.exists(b => b == false)
}
}
// small()
generator_test()
1
SEQ(SEQ(SEQ(ONE,CHAR('b')),STAR(CHAR('b'))),
SEQ(ALTS(ALTS(ZERO,STAR(CHAR('b'))),
STAR(ALTS(CHAR('a'),SEQ(SEQ(STAR(ALTS(STAR(CHAR('c')),CHAR('a'))),
SEQ(CHAR('a'),SEQ(ALTS(CHAR('b'),ZERO),SEQ(ONE,CHAR('b'))))),ONE)))),ONE))
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))
// Sequ(Sequ(Sequ(Empty,Chr(b)),Stars(List(Chr(b), Chr(b)))),Sequ(Right(Stars(List(Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty)), Right(Sequ(Sequ(Stars(List(Right(Chr(a)), Right(Chr(a)))),Sequ(Chr(a),Sequ(Left(Chr(b)),Sequ(Empty,Chr(b))))),Empty))))),Empty))