--- a/Brexp.scala Fri Mar 15 12:27:12 2019 +0000
+++ b/Brexp.scala Sat Mar 16 14:14:42 2019 +0000
@@ -125,7 +125,7 @@
case rs => {BALTS(bs, rs)}
}
}
- //case BSTAR(r) => BSTAR(r)
+ case BSTAR(r) => BSTAR(strong_br_simp(r))
case r => r
}
//we want to bound the size by a function bspill s.t.
--- a/Spiral.scala Fri Mar 15 12:27:12 2019 +0000
+++ b/Spiral.scala Sat Mar 16 14:14:42 2019 +0000
@@ -29,6 +29,35 @@
val alphabet = ("""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.:"=()\;-+*!<>\/%{} """+"\n\t").toSet//Set('a','b','c')
def bregx_tree(r: BRexp): Element = regx_tree(berase(r))
def regx_tree(r: Rexp): Element = aregx_tree(internalise(r))
+ def annotated_tree(r: ARexp): Element = {
+ r match {
+ case ACHAR(bs, d) => {
+ //val Some(d) = alphabet.find(f)
+ d match {
+ case '\n' => elem("\\n")
+ case '\t' => elem("\\t")
+ case ' ' => elem("space")
+ case d => elem(d.toString)
+ }
+ }
+ case AONE(bs) => {
+ elem("ONE")
+ }
+ case AZERO => {
+ elem("ZERO")
+ }
+ case ASEQ(bs, r1, r2) => {
+ binary_print("SEQ", r1, r2)
+ }
+ case AALTS(bs, rs) => {
+ //elem("Awaiting completion")
+ list_print("ALT", rs)
+ }
+ case ASTAR(bs, r) => {
+ list_print("STA", List(r))
+ }
+ }
+ }
def aregx_tree(r: ARexp): Element = {
r match {
case ACHAR(bs, d) => {
@@ -276,11 +305,11 @@
//this is for comparison between normal simp and the weakened version of simp
//normal simp will be performed on syncold
//weakend simp will be performed on old
- var syncold = brternalise(r)
+ var syncold = internalise(r)
val all_chars = s.toList
for (i <- 0 to s.length - 1){
- val syncder_res = brder(all_chars(i), syncold)
- val syncsimp_res = strong_br_simp(syncder_res)
+ val syncder_res = bder(all_chars(i), syncold)
+ val syncsimp_res = super_bsimp(syncder_res)
//see brder for detailed explanation
//just changes bit Z to S when deriving an ALTS,
//signifying that the structure has been "touched" and
@@ -289,10 +318,14 @@
val simp_res = br_simp(der_res)
val anatomy = bspill(simp_res)
//track if the number of regular expressions exceeds those in the PD set(remember PD means the pders over A*)
- if(f(anatomy, pd) == false || i == 1){
- println(size(berase(syncsimp_res)))
+ if(f(List(berase(simp_res)), pd) == false ){
+ println(size(erase(syncsimp_res)))
println(size(berase(simp_res)))
println(bregx_tree(simp_res))
+ println(s)
+ println(i)
+ println(r)
+ println()
println(anatomy.map(size).sum)
println(pd.map(size).sum)
}
@@ -311,22 +344,22 @@
}
}
def size_comp(anatomy: List[Rexp], pd: Set[Rexp]):Boolean = {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); true}
- def size_expansion_rate(anatomy: List[Rexp], pd: Set[Rexp]): Boolean = if(anatomy.size > (pd.size)*2 ) {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); inclusion_truth(anatomy, pd); false }else {true}
+ def size_expansion_rate(r: List[Rexp], pd: Set[Rexp]): Boolean = if(size(r(0)) > (pd.map(size).sum) * 3 ) { false }else {true}
def ders_simp(r: ARexp, s: List[Char]): ARexp = {
s match {
case Nil => r
case c::cs => ders_simp(bsimp(bder(c, r)), cs)
}
}
+ val big_panda = STAR(STAR(STAR(ALTS(List(ALTS(List(CHAR('c'), CHAR('b'))), SEQ(CHAR('c'),CHAR('c')))))))
+ val str_panda = "ccccb"
def check_all(){
- for(i <- 1 to 1)
- {
- val s = "bb"//rd_string_gen(alphabet_size, 5)//"ac"//rd_string_gen(alphabet_size, 5)
- val r = STAR(STAR(ALTS(List(SEQ(CHAR('b'),CHAR('b')), ALTS(List(CHAR('a'), CHAR('b')))))))//balanced_struct_gen(4)//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7)
- //subset_check(r, s)
- weak_sub_check(r, s, 5, size_expansion_rate)
- }
+
+ weak_sub_check(big_panda, str_panda, 6, size_expansion_rate)
+
}
+ //simplified regex size 291, so called pd_simp size 70 (did not do simplification to terms in the PD set)
+
def correctness_proof_convenient_path(){
for(i <- 1 to 1)
{
@@ -336,12 +369,18 @@
val ss = s.slice(0, j+ 1)
val nangao = ders_simp(r, ss.toList)
val easy = bsimp(bders(ss.toList, r))
- if(nangao != easy|| j == 2){
+ if(true){
println(j)
if(j == 3) println("not equal")
+ println("string")
println(ss)
+ println("original regex")
println(r)
+ println("regex after ders simp")
println(nangao)
+ println("regex after ders")
+ println(bders(ss.toList, r))
+ println("regex after ders and then a single simp")
println(easy)
}
}
@@ -349,7 +388,8 @@
}
def main(args: Array[String]) {
//check_all()
- correctness_proof_convenient_path
+ enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet
+ //correctness_proof_convenient_path()
}
}
--- a/lex_blex_Frankensteined.scala Fri Mar 15 12:27:12 2019 +0000
+++ b/lex_blex_Frankensteined.scala Sat Mar 16 14:14:42 2019 +0000
@@ -88,10 +88,10 @@
}
internalise(("a" | "ab") ~ ("b" | ""))
-/*
+
def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
case (ONE, bs) => (Empty, bs)
- case (PRED(f), C(c)::bs) => (Chr(c), bs)
+ case (CHAR(f), C(c)::bs) => (Chr(c), bs)
case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems tailor made for those who want to simplify the regex before der or simp
case (ALTS(rs), bs) => bs match {
case Z::bs1 => {
@@ -125,7 +125,7 @@
case (v, Nil) => v
case _ => throw new Exception("Not decodable")
}
-*/
+
//erase function: extracts the regx from Aregex
def erase(r:ARexp): Rexp = r match{
@@ -370,54 +370,8 @@
case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2)
case r1 :: rs2 => r1 :: rflats(rs2)
}
- //val flats_time = scala.collection.mutable.ArrayBuffer.empty[Long]
- //val dist_time = scala.collection.mutable.ArrayBuffer.empty[Long]
var flats_time = 0L
var dist_time = 0L
- /*
- def bsimp(r: ARexp, depth: Int): ARexp =
- {
- r match {
- case ASEQ(bs1, r1, r2) => (bsimp(r1, depth), bsimp(r2, depth)) match {
- case (AZERO, _) => AZERO
- case (_, AZERO) => AZERO
- case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
- case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
- }
- case AALTS(bs1, rs) => {
- depth match {
- case 0 => {
- flats(distinctBy(rs, erase)) match {
- case Nil => AZERO
- case s :: Nil => fuse(bs1, s)
- case rs => AALTS(bs1, rs)
- }
- }
- case n => {
- val rs_simp = rs.map(bsimp(_, n - 1))
- val time2 = System.nanoTime()
- val flat_res = flats(rs_simp)
- val time3 = System.nanoTime()
- val dist_res = distinctBy(flat_res, erase)
- val time4 = System.nanoTime()
- flats_time = flats_time + time3 - time2
- dist_time = dist_time + time4 - time3
- //flats_time += time3 - time2
- //dist_time += time4 - time3
- //distinctBy(flats(rs.map(bsimp)), erase) match {
- dist_res match {
- case Nil => AZERO
- case s :: Nil => fuse(bs1, s)
- case rs => AALTS(bs1, rs)
- }
- }
- }
- }
- case r => r
- }
- }
- */
- //----------------------------------------------------------------------------This bsimp is the original slow one
def bsimp(r: ARexp): ARexp = r match {
case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
@@ -428,49 +382,40 @@
}
case AALTS(bs1, rs) => {
val rs_simp = rs.map(bsimp)
- val time2 = System.nanoTime()
val flat_res = flats(rs_simp)
- val time3 = System.nanoTime()
val dist_res = distinctBy(flat_res, erase)
- val time4 = System.nanoTime()
- flats_time = flats_time + time3 - time2
- dist_time = dist_time + time4 - time3
dist_res match {
case Nil => AZERO
case s :: Nil => fuse(bs1, s)
case rs => AALTS(bs1, rs)
}
}
- case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
+ //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
case r => r
}
-
- def bsimp_weakened(r: ARexp): ARexp = r match {
- case ASEQ(bs1, r1, r2) => (bsimp_weakened(r1), r2) match {
+ def super_bsimp(r: ARexp): ARexp = r match {
+ case ASEQ(bs1, r1, r2) => (super_bsimp(r1), super_bsimp(r2)) match {
case (AZERO, _) => AZERO
case (_, AZERO) => AZERO
case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
+ case (AALTS(bs2, rs), r2) => AALTS(bs1 ++ bs2, rs.map(r => r match {case AONE(bs3) => fuse(bs3, r2) case r => ASEQ(Nil, r, r2)}) )
case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
}
case AALTS(bs1, rs) => {
- val rs_simp = rs.map(bsimp_weakened)
- val time2 = System.nanoTime()
+ val rs_simp = rs.map(super_bsimp)
val flat_res = flats(rs_simp)
- val time3 = System.nanoTime()
val dist_res = distinctBy(flat_res, erase)
- val time4 = System.nanoTime()
- flats_time = flats_time + time3 - time2
- dist_time = dist_time + time4 - time3
dist_res match {
case Nil => AZERO
case s :: Nil => fuse(bs1, s)
case rs => AALTS(bs1, rs)
}
}
- case ASTAR(bs, r) => ASTAR(bs, bsimp_weakened(r))
+ //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
case r => r
}
+
def simp_weakened(r: Rexp): Rexp = r match {
case SEQ(r1, r2) => (simp_weakened(r1), r2) match {
case (ZERO, _) => ZERO
@@ -532,66 +477,32 @@
case Nil => {
if (bnullable(r)) {
//println(asize(r))
- val time4 = System.nanoTime()
- val bits = mkepsBC(r)
- val time5 = System.nanoTime()
- mkepsBC_time = time5 - time4
- bits
+ mkepsBC(r)
}
else throw new Exception("Not matched")
}
case c::cs => {
- val time1 = System.nanoTime()
val der_res = bder(c,r)
- val time2 = System.nanoTime()
val simp_res = bsimp(der_res)
- val time3 = System.nanoTime()
- bder_time = bder_time + time2 - time1
- bsimp_time = bsimp_time + time3 - time2
blex_simp(simp_res, cs)
}
}
+ def super_blex_simp(r: ARexp, s: List[Char]): Bits = s match {
+ case Nil => {
+ if (bnullable(r)) {
+ mkepsBC(r)
+ }
+ else throw new Exception("Not matched")
+ }
+ case c::cs => {
+ super_blex_simp(super_bsimp(bder(c,r)), cs)
+ }
+ }
def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{
case Nil => r
case c::cs => blex_real_simp(bsimp(bder(c, r)), cs)
}
- //-------------------------------------------------------------------------------------tests whether simp(simp(r)) == simp(r) holds true
- /*
- def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
- case Nil => {
- if (bnullable(r)) {
- //println(asize(r))
- mkepsBC(r)
- }
- else throw new Exception("Not matched")
- }
- case c::cs => {
- val der_res = bder(c,r)
- val simp_res = bsimp(der_res)
- //val simp_res2 = bsimp(simp_res)
- //println("Size reduction from "+asize(der_res)+ " to " +asize(simp_res)+" to " + asize(simp_res2))
- blex_simp(simp_res, cs)
- }
- }
- */
- /*
- def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
- case Nil => {
- if (nullable(r)) {
- mkeps(r)
- }
- else throw new Exception("Not matched")
- }
- case c::cs => {
- val start = System.nanoTime()
- val (r_simp, f_simp) = simp(der(c, r))
- val end = System.nanoTime()
- println((end - start)/1.0e9)
- inj(r, c, f_simp(lex_simp(r_simp, cs)))
- }
- }
- */
//size: of a Aregx for testing purposes
def size(r: Rexp) : Int = r match {
@@ -608,27 +519,11 @@
// decoding does not work yet
def blexing_simp(r: Rexp, s: String) = {
- //flats_time.clear()
- //dist_time.clear()
- flats_time = 0L
- dist_time = 0L
- bder_time = 0L
- bsimp_time = 0L
- mkepsBC_time = 0L
- val start = System.nanoTime()
val bit_code = blex_simp(internalise(r), s.toList)
- val end = System.nanoTime()
- println("total time: "+ (end - start)/1.0e9)
- println("spent on flats: " + (flats_time/(1.0e9)))
- println("spent on distinctBy: " + (dist_time/(1.0e9)))
- println("spent on bder: "+ bder_time/1.0e9)
- println("spent on bsimp: " + bsimp_time/1.0e9)
- println("spent on mkepsBC: " + mkepsBC_time/1.0e9)
- //println(s"The length of the string ${s.length}; length of bit sequence:")
- //println((bit_code.length))
- //println(final_derivative)
- //bit_code
- //decode(r, bit_code)
+ decode(r, bit_code)
+ }
+ def super_blexing_simp(r: Rexp, s: String) = {
+ decode(r, super_blex_simp(internalise(r), s.toList))
}
@@ -830,7 +725,7 @@
//println(s"Testing ${r}")
for (s <- ss.par) yield {
val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None)
- val res2 = Try(Some(blexing_simp(r, s))).getOrElse(None)
+ val res2 = Try(Some(super_blexing_simp(r, s))).getOrElse(None)
if (res1 != res2) println(s"Disagree on ${r} and ${s}")
if (res1 != res2) println(s" ${res1} != ${res2}")
if (res1 != res2) Some((r, s)) else None
@@ -839,7 +734,7 @@
- //enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet
+
/*
def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = {
for (s <- ss){