# HG changeset patch # User Chengsong # Date 1552745682 0 # Node ID 622ddbb1223a52417a548d11e81a0249955806c5 # Parent 7a349fe58bf400eeff2c19a195435be61b92842c correctness test with enumeration diff -r 7a349fe58bf4 -r 622ddbb1223a Brexp.scala --- a/Brexp.scala Fri Mar 15 12:27:12 2019 +0000 +++ b/Brexp.scala Sat Mar 16 14:14:42 2019 +0000 @@ -125,7 +125,7 @@ case rs => {BALTS(bs, rs)} } } - //case BSTAR(r) => BSTAR(r) + case BSTAR(r) => BSTAR(strong_br_simp(r)) case r => r } //we want to bound the size by a function bspill s.t. diff -r 7a349fe58bf4 -r 622ddbb1223a Spiral.scala --- a/Spiral.scala Fri Mar 15 12:27:12 2019 +0000 +++ b/Spiral.scala Sat Mar 16 14:14:42 2019 +0000 @@ -29,6 +29,35 @@ val alphabet = ("""abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.:"=()\;-+*!<>\/%{} """+"\n\t").toSet//Set('a','b','c') def bregx_tree(r: BRexp): Element = regx_tree(berase(r)) def regx_tree(r: Rexp): Element = aregx_tree(internalise(r)) + def annotated_tree(r: ARexp): Element = { + r match { + case ACHAR(bs, d) => { + //val Some(d) = alphabet.find(f) + d match { + case '\n' => elem("\\n") + case '\t' => elem("\\t") + case ' ' => elem("space") + case d => elem(d.toString) + } + } + case AONE(bs) => { + elem("ONE") + } + case AZERO => { + elem("ZERO") + } + case ASEQ(bs, r1, r2) => { + binary_print("SEQ", r1, r2) + } + case AALTS(bs, rs) => { + //elem("Awaiting completion") + list_print("ALT", rs) + } + case ASTAR(bs, r) => { + list_print("STA", List(r)) + } + } + } def aregx_tree(r: ARexp): Element = { r match { case ACHAR(bs, d) => { @@ -276,11 +305,11 @@ //this is for comparison between normal simp and the weakened version of simp //normal simp will be performed on syncold //weakend simp will be performed on old - var syncold = brternalise(r) + var syncold = internalise(r) val all_chars = s.toList for (i <- 0 to s.length - 1){ - val syncder_res = brder(all_chars(i), syncold) - val syncsimp_res = strong_br_simp(syncder_res) + val syncder_res = bder(all_chars(i), syncold) + val syncsimp_res = super_bsimp(syncder_res) //see brder for detailed explanation //just changes bit Z to S when deriving an ALTS, //signifying that the structure has been "touched" and @@ -289,10 +318,14 @@ val simp_res = br_simp(der_res) val anatomy = bspill(simp_res) //track if the number of regular expressions exceeds those in the PD set(remember PD means the pders over A*) - if(f(anatomy, pd) == false || i == 1){ - println(size(berase(syncsimp_res))) + if(f(List(berase(simp_res)), pd) == false ){ + println(size(erase(syncsimp_res))) println(size(berase(simp_res))) println(bregx_tree(simp_res)) + println(s) + println(i) + println(r) + println() println(anatomy.map(size).sum) println(pd.map(size).sum) } @@ -311,22 +344,22 @@ } } def size_comp(anatomy: List[Rexp], pd: Set[Rexp]):Boolean = {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); true} - def size_expansion_rate(anatomy: List[Rexp], pd: Set[Rexp]): Boolean = if(anatomy.size > (pd.size)*2 ) {println("size of PD and bspilled simp regx: ", pd.size, anatomy.size); inclusion_truth(anatomy, pd); false }else {true} + def size_expansion_rate(r: List[Rexp], pd: Set[Rexp]): Boolean = if(size(r(0)) > (pd.map(size).sum) * 3 ) { false }else {true} def ders_simp(r: ARexp, s: List[Char]): ARexp = { s match { case Nil => r case c::cs => ders_simp(bsimp(bder(c, r)), cs) } } + val big_panda = STAR(STAR(STAR(ALTS(List(ALTS(List(CHAR('c'), CHAR('b'))), SEQ(CHAR('c'),CHAR('c'))))))) + val str_panda = "ccccb" def check_all(){ - for(i <- 1 to 1) - { - val s = "bb"//rd_string_gen(alphabet_size, 5)//"ac"//rd_string_gen(alphabet_size, 5) - val r = STAR(STAR(ALTS(List(SEQ(CHAR('b'),CHAR('b')), ALTS(List(CHAR('a'), CHAR('b')))))))//balanced_struct_gen(4)//SEQ(ALTS(List(STAR("a"),ALTS(List("a","c")))),SEQ(ALTS(List("c","a")),ALTS(List("c","b")))) //random_struct_gen(7) - //subset_check(r, s) - weak_sub_check(r, s, 5, size_expansion_rate) - } + + weak_sub_check(big_panda, str_panda, 6, size_expansion_rate) + } + //simplified regex size 291, so called pd_simp size 70 (did not do simplification to terms in the PD set) + def correctness_proof_convenient_path(){ for(i <- 1 to 1) { @@ -336,12 +369,18 @@ val ss = s.slice(0, j+ 1) val nangao = ders_simp(r, ss.toList) val easy = bsimp(bders(ss.toList, r)) - if(nangao != easy|| j == 2){ + if(true){ println(j) if(j == 3) println("not equal") + println("string") println(ss) + println("original regex") println(r) + println("regex after ders simp") println(nangao) + println("regex after ders") + println(bders(ss.toList, r)) + println("regex after ders and then a single simp") println(easy) } } @@ -349,7 +388,8 @@ } def main(args: Array[String]) { //check_all() - correctness_proof_convenient_path + enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet + //correctness_proof_convenient_path() } } diff -r 7a349fe58bf4 -r 622ddbb1223a lex_blex_Frankensteined.scala --- a/lex_blex_Frankensteined.scala Fri Mar 15 12:27:12 2019 +0000 +++ b/lex_blex_Frankensteined.scala Sat Mar 16 14:14:42 2019 +0000 @@ -88,10 +88,10 @@ } internalise(("a" | "ab") ~ ("b" | "")) -/* + def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match { case (ONE, bs) => (Empty, bs) - case (PRED(f), C(c)::bs) => (Chr(c), bs) + case (CHAR(f), C(c)::bs) => (Chr(c), bs) case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems tailor made for those who want to simplify the regex before der or simp case (ALTS(rs), bs) => bs match { case Z::bs1 => { @@ -125,7 +125,7 @@ case (v, Nil) => v case _ => throw new Exception("Not decodable") } -*/ + //erase function: extracts the regx from Aregex def erase(r:ARexp): Rexp = r match{ @@ -370,54 +370,8 @@ case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2) case r1 :: rs2 => r1 :: rflats(rs2) } - //val flats_time = scala.collection.mutable.ArrayBuffer.empty[Long] - //val dist_time = scala.collection.mutable.ArrayBuffer.empty[Long] var flats_time = 0L var dist_time = 0L - /* - def bsimp(r: ARexp, depth: Int): ARexp = - { - r match { - case ASEQ(bs1, r1, r2) => (bsimp(r1, depth), bsimp(r2, depth)) match { - case (AZERO, _) => AZERO - case (_, AZERO) => AZERO - case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) - case (r1s, r2s) => ASEQ(bs1, r1s, r2s) - } - case AALTS(bs1, rs) => { - depth match { - case 0 => { - flats(distinctBy(rs, erase)) match { - case Nil => AZERO - case s :: Nil => fuse(bs1, s) - case rs => AALTS(bs1, rs) - } - } - case n => { - val rs_simp = rs.map(bsimp(_, n - 1)) - val time2 = System.nanoTime() - val flat_res = flats(rs_simp) - val time3 = System.nanoTime() - val dist_res = distinctBy(flat_res, erase) - val time4 = System.nanoTime() - flats_time = flats_time + time3 - time2 - dist_time = dist_time + time4 - time3 - //flats_time += time3 - time2 - //dist_time += time4 - time3 - //distinctBy(flats(rs.map(bsimp)), erase) match { - dist_res match { - case Nil => AZERO - case s :: Nil => fuse(bs1, s) - case rs => AALTS(bs1, rs) - } - } - } - } - case r => r - } - } - */ - //----------------------------------------------------------------------------This bsimp is the original slow one def bsimp(r: ARexp): ARexp = r match { case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match { @@ -428,49 +382,40 @@ } case AALTS(bs1, rs) => { val rs_simp = rs.map(bsimp) - val time2 = System.nanoTime() val flat_res = flats(rs_simp) - val time3 = System.nanoTime() val dist_res = distinctBy(flat_res, erase) - val time4 = System.nanoTime() - flats_time = flats_time + time3 - time2 - dist_time = dist_time + time4 - time3 dist_res match { case Nil => AZERO case s :: Nil => fuse(bs1, s) case rs => AALTS(bs1, rs) } } - case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) + //case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) case r => r } - - def bsimp_weakened(r: ARexp): ARexp = r match { - case ASEQ(bs1, r1, r2) => (bsimp_weakened(r1), r2) match { + def super_bsimp(r: ARexp): ARexp = r match { + case ASEQ(bs1, r1, r2) => (super_bsimp(r1), super_bsimp(r2)) match { case (AZERO, _) => AZERO case (_, AZERO) => AZERO case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) + case (AALTS(bs2, rs), r2) => AALTS(bs1 ++ bs2, rs.map(r => r match {case AONE(bs3) => fuse(bs3, r2) case r => ASEQ(Nil, r, r2)}) ) case (r1s, r2s) => ASEQ(bs1, r1s, r2s) } case AALTS(bs1, rs) => { - val rs_simp = rs.map(bsimp_weakened) - val time2 = System.nanoTime() + val rs_simp = rs.map(super_bsimp) val flat_res = flats(rs_simp) - val time3 = System.nanoTime() val dist_res = distinctBy(flat_res, erase) - val time4 = System.nanoTime() - flats_time = flats_time + time3 - time2 - dist_time = dist_time + time4 - time3 dist_res match { case Nil => AZERO case s :: Nil => fuse(bs1, s) case rs => AALTS(bs1, rs) } } - case ASTAR(bs, r) => ASTAR(bs, bsimp_weakened(r)) + //case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) case r => r } + def simp_weakened(r: Rexp): Rexp = r match { case SEQ(r1, r2) => (simp_weakened(r1), r2) match { case (ZERO, _) => ZERO @@ -532,66 +477,32 @@ case Nil => { if (bnullable(r)) { //println(asize(r)) - val time4 = System.nanoTime() - val bits = mkepsBC(r) - val time5 = System.nanoTime() - mkepsBC_time = time5 - time4 - bits + mkepsBC(r) } else throw new Exception("Not matched") } case c::cs => { - val time1 = System.nanoTime() val der_res = bder(c,r) - val time2 = System.nanoTime() val simp_res = bsimp(der_res) - val time3 = System.nanoTime() - bder_time = bder_time + time2 - time1 - bsimp_time = bsimp_time + time3 - time2 blex_simp(simp_res, cs) } } + def super_blex_simp(r: ARexp, s: List[Char]): Bits = s match { + case Nil => { + if (bnullable(r)) { + mkepsBC(r) + } + else throw new Exception("Not matched") + } + case c::cs => { + super_blex_simp(super_bsimp(bder(c,r)), cs) + } + } def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{ case Nil => r case c::cs => blex_real_simp(bsimp(bder(c, r)), cs) } - //-------------------------------------------------------------------------------------tests whether simp(simp(r)) == simp(r) holds true - /* - def blex_simp(r: ARexp, s: List[Char]) : Bits = s match { - case Nil => { - if (bnullable(r)) { - //println(asize(r)) - mkepsBC(r) - } - else throw new Exception("Not matched") - } - case c::cs => { - val der_res = bder(c,r) - val simp_res = bsimp(der_res) - //val simp_res2 = bsimp(simp_res) - //println("Size reduction from "+asize(der_res)+ " to " +asize(simp_res)+" to " + asize(simp_res2)) - blex_simp(simp_res, cs) - } - } - */ - /* - def lex_simp(r: Rexp, s: List[Char]) : Val = s match { - case Nil => { - if (nullable(r)) { - mkeps(r) - } - else throw new Exception("Not matched") - } - case c::cs => { - val start = System.nanoTime() - val (r_simp, f_simp) = simp(der(c, r)) - val end = System.nanoTime() - println((end - start)/1.0e9) - inj(r, c, f_simp(lex_simp(r_simp, cs))) - } - } - */ //size: of a Aregx for testing purposes def size(r: Rexp) : Int = r match { @@ -608,27 +519,11 @@ // decoding does not work yet def blexing_simp(r: Rexp, s: String) = { - //flats_time.clear() - //dist_time.clear() - flats_time = 0L - dist_time = 0L - bder_time = 0L - bsimp_time = 0L - mkepsBC_time = 0L - val start = System.nanoTime() val bit_code = blex_simp(internalise(r), s.toList) - val end = System.nanoTime() - println("total time: "+ (end - start)/1.0e9) - println("spent on flats: " + (flats_time/(1.0e9))) - println("spent on distinctBy: " + (dist_time/(1.0e9))) - println("spent on bder: "+ bder_time/1.0e9) - println("spent on bsimp: " + bsimp_time/1.0e9) - println("spent on mkepsBC: " + mkepsBC_time/1.0e9) - //println(s"The length of the string ${s.length}; length of bit sequence:") - //println((bit_code.length)) - //println(final_derivative) - //bit_code - //decode(r, bit_code) + decode(r, bit_code) + } + def super_blexing_simp(r: Rexp, s: String) = { + decode(r, super_blex_simp(internalise(r), s.toList)) } @@ -830,7 +725,7 @@ //println(s"Testing ${r}") for (s <- ss.par) yield { val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None) - val res2 = Try(Some(blexing_simp(r, s))).getOrElse(None) + val res2 = Try(Some(super_blexing_simp(r, s))).getOrElse(None) if (res1 != res2) println(s"Disagree on ${r} and ${s}") if (res1 != res2) println(s" ${res1} != ${res2}") if (res1 != res2) Some((r, s)) else None @@ -839,7 +734,7 @@ - //enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet + /* def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = { for (s <- ss){