diff -r c334f0b3ef52 -r cc54ce075db5 thys2/blexer2.sc --- a/thys2/blexer2.sc Tue May 31 17:36:45 2022 +0100 +++ b/thys2/blexer2.sc Fri Jun 03 16:45:30 2022 +0100 @@ -522,6 +522,7 @@ case (_, AZERO) => AZERO case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) case (r1s, AONE(bs2)) => fuse(bs1, r1s) //asserted bs2 == Nil + //case (r1s, r2s) if(isOne(erase(r1s))) => fuse(bs1 ++ mkepsBC(r1s), r2s) case (r1s, r2s) => ASEQ(bs1, r1s, r2s) } case AALTS(bs1, rs) => { @@ -535,10 +536,78 @@ case rs => AALTS(bs1, rs) } } + case ASTAR(bs, r0) if(atMostEmpty(erase(r0))) => AONE(bs) case r => r } } +def distinctWith(rs: List[ARexp], + f: (ARexp, Set[Rexp]) => ARexp, + acc: Set[Rexp] = Set()) : List[ARexp] = + rs match{ + case Nil => Nil + case r :: rs => + if(acc(erase(r))) + distinctWith(rs, f, acc) + else { + val pruned_r = f(r, acc) + pruned_r :: distinctWith(rs, f, strongBreakIntoTerms(erase(pruned_r)) ++: acc) + } + } + +//r = r' ~ tail => returns r' +def removeSeqTail(r: Rexp, tail: Rexp) : Rexp = r match { + case SEQ(r1, r2) => + if(r2 == tail) + r1 + else + ZERO + case r => ZERO +} + +def prune7(r: ARexp, acc: Set[Rexp]) : ARexp = r match{ + case AALTS(bs, rs) => rs.map(r => prune7(r, acc)).filter(_ != ZERO) match + { + case Nil => AZERO + case r::Nil => fuse(bs, r) + case rs1 => AALTS(bs, rs1) + } + case ASEQ(bs, r1, r2) => prune7(r1, acc.map(r => removeSeqTail(r, erase(r2)))) match { + case AZERO => AZERO + case r1p if(isOne(erase(r1p))) => fuse(bs ++ mkepsBC(r1p), r2) + case r1p => ASEQ(bs, r1p, r2) + } + case r => if(acc(erase(r))) AZERO else r +} + +def strongBsimp7(r: ARexp): ARexp = +{ + r match { + case ASEQ(bs1, r1, r2) => (strongBsimp7(r1), strongBsimp7(r2)) match { + case (AZERO, _) => AZERO + case (_, AZERO) => AZERO + case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) + case (r1s, AONE(bs2)) => fuse(bs1, r1s) //asserted bs2 == Nil + //case (r1s, r2s) if(isOne(erase(r1s))) => fuse(bs1 ++ mkepsBC(r1s), r2s) + case (r1s, r2s) => ASEQ(bs1, r1s, r2s) + } + case AALTS(bs1, rs) => { + // println("alts case") + val rs_simp = rs.map(strongBsimp7(_)) + val flat_res = flats(rs_simp) + var dist_res = distinctWith(flat_res, prune7)//distinctBy(flat_res, erase) + (dist_res) match { + case Nil => AZERO + case s :: Nil => fuse(bs1, s) + case rs => AALTS(bs1, rs) + } + } + case ASTAR(bs, r0) if(atMostEmpty(erase(r0))) => AONE(bs) + case r => r + } +} + + def bders (s: List[Char], r: ARexp) : ARexp = s match { case Nil => r case c::s => bders(s, bder(c, r)) @@ -717,9 +786,28 @@ } } +def atMostEmpty(r: Rexp) : Boolean = r match { + case ZERO => true + case ONE => true + case STAR(r) => atMostEmpty(r) + case SEQ(r1, r2) => atMostEmpty(r1) && atMostEmpty(r2) + case ALTS(r1, r2) => atMostEmpty(r1) && atMostEmpty(r2) + case CHAR(_) => false +} + +def isOne(r: Rexp) : Boolean = r match { + case ONE => true + case SEQ(r1, r2) => isOne(r1) && isOne(r2) + case ALTS(r1, r2) => (isOne(r1) || isOne(r2)) && (atMostEmpty(r1) && atMostEmpty(r2))//rs.forall(atMostEmpty) && rs.exists(isOne) + case STAR(r0) => atMostEmpty(r0) + case CHAR(c) => false + case ZERO => false + +} + def strongBreakIntoTerms(r: Rexp): List[Rexp] = r match { - case SEQ(r1, r2) => if(nullable(r1)) - strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2)) ::: + case SEQ(r1, r2) => if(isOne(r1)) + //strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2)) ::: strongBreakIntoTerms(r2) else strongBreakIntoTerms(r1).map(r11 => SEQ(r11, r2)) @@ -734,34 +822,14 @@ } def ABIncludedByC[A, B, C](a: A, b: B, c: C, f: (A, B) => C, subseteqPred: (C, C) => Boolean) : Boolean = { - subseteqPred(f(a, b), c) } -def rexpListInclusion(rs1: Set[Rexp], rs2: Set[Rexp]) : Boolean = { - // println("r+ctx---------") - // rsprint(rs1) - // println("acc---------") - // rsprint(rs2) - +def rs1_subseteq_rs2(rs1: Set[Rexp], rs2: Set[Rexp]) : Boolean = { val res = rs1.forall(rs2.contains) - // println(res) - // println("end------------------") res } def prune6(acc: Set[Rexp], r: ARexp, ctx: List[Rexp]) : ARexp = { - // println("pakc--------r") - // println(shortRexpOutput(erase(r))) - // println("ctx---------") - // rsprint(ctx) - // println("pakc-------acc") - // rsprint(acc) - // println("r+ctx broken down---------") - // rsprint(breakIntoTerms(L(erase(r), ctx)).map(oneSimp)) - - // rprint(L(erase(r), ctx)) - //breakIntoTerms(L(erase(r), ctx)).map(oneSimp).forall(acc.contains) - if (ABIncludedByC(r, ctx, acc, attachCtx, rexpListInclusion)) {//acc.flatMap(breakIntoTerms - //println("included in acc!!!") + if (ABIncludedByC(r, ctx, acc, attachCtx, rs1_subseteq_rs2)) {//acc.flatMap(breakIntoTerms AZERO } else{ @@ -776,23 +844,12 @@ ASEQ(bs, r1p, r2) } case AALTS(bs, rs0) => - // println("before pruning") - // println(s"ctx is ") - // ctx.foreach(r => println(shortRexpOutput(r))) - // println(s"rs0 is ") - // rs0.foreach(r => println(shortRexpOutput(erase(r)))) - // println(s"acc is ") - // acc.foreach(r => println(shortRexpOutput(r))) rs0.map(r => prune6(acc, r, ctx)).filter(_ != AZERO) match { case Nil => - // println("after pruning Nil") AZERO case r :: Nil => - // println("after pruning singleton") - // println(shortRexpOutput(erase(r))) r case rs0p => - // println("after pruning non-singleton") AALTS(bs, rs0p) } case r => r @@ -913,7 +970,6 @@ def strong_blex_simp5(r: ARexp, s: List[Char]) : Bits = s match { case Nil => { if (bnullable(r)) { - //println(asize(r)) val bits = mkepsBC(r) bits } @@ -943,6 +999,10 @@ case Nil => r case c::s => bdersStrong6(s, strongBsimp6(bder(c, r))) } + def bdersStrong7(s: List[Char], r: ARexp) : ARexp = s match { + case Nil => r + case c::s => bdersStrong7(s, strongBsimp7(bder(c, r))) + } def bdersSimp(s: String, r: Rexp) : ARexp = bders_simp(s.toList, internalise(r)) def bdersStrong(s: List[Char], r: ARexp) : ARexp = s match { @@ -1189,82 +1249,38 @@ def generator_test() { - // test(rexp(7), 1000) { (r: Rexp) => - // val ss = stringsFromRexp(r) - // val boolList = ss.map(s => { - // val simpVal = simpBlexer(r, s) - // val strongVal = strongBlexer(r, s) - // // println(simpVal) - // // println(strongVal) - // (simpVal == strongVal) && (simpVal != None) - // }) - // !boolList.exists(b => b == false) - // } - // test(single(STAR(ALTS(STAR(CHAR('c')),ALTS(CHAR('c'),ZERO)))), 100000) { (r: Rexp) => - // val ss = stringsFromRexp(r) - // val boolList = ss.map(s => { - // val bdStrong = bdersStrong(s.toList, internalise(r)) - // val bdStrong5 = bdersStrong5(s.toList, internalise(r)) - // // println(shortRexpOutput(r)) - // // println(s) - // // println(strongVal) - // // println(strongVal5) - // // (strongVal == strongVal5) - - // if(asize(bdStrong5) > asize(bdStrong)){ - // println(s) - // println(shortRexpOutput(erase(bdStrong5))) - // println(shortRexpOutput(erase(bdStrong))) - // } - // asize(bdStrong5) <= asize(bdStrong) - // }) - // !boolList.exists(b => b == false) - // } - //*** example where bdStrong5 has a smaller size than bdStrong - // test(single(STAR(SEQ(ALTS(SEQ(STAR(CHAR('a')),ALTS(ALTS(ONE,ZERO),SEQ(ONE,ONE))),CHAR('a')),ONE))), 1) { (r: Rexp) => - //*** depth 5 example bdStrong5 larger size than bdStrong - // test(single(STAR(SEQ(SEQ(ALTS(CHAR('b'),STAR(CHAR('b'))),CHAR('b')),(ALTS(STAR(CHAR('c')), ONE))))), 1) {(r: Rexp) => - - - - //sanity check from Christian's request - // val r = ("a" | "ab") ~ ("bc" | "c") - // val a = internalise(r) - // val aval = blexing_simp(r, "abc") - // println(aval) - - //sample counterexample:(depth 7) - //STAR(SEQ(ALTS(STAR(STAR(STAR(STAR(CHAR(c))))),ALTS(CHAR(c),CHAR(b))),ALTS(ZERO,SEQ(ALTS(ALTS(STAR(CHAR(c)),SEQ(CHAR(b),CHAR(a))),CHAR(c)),STAR(ALTS(ALTS(ONE,CHAR(a)),STAR(CHAR(c)))))))) - //(depth5) - //STAR(SEQ(ALTS(ALTS(STAR(CHAR(b)),SEQ(ONE,CHAR(b))),SEQ(STAR(CHAR(a)),CHAR(b))),ALTS(ZERO,ALTS(STAR(CHAR(b)),STAR(CHAR(a)))))) - test(rexp(8), 10000) { (r: Rexp) => + test(rexp(4), 1000000) { (r: Rexp) => // ALTS(SEQ(SEQ(ONE,CHAR('a')),STAR(CHAR('a'))),SEQ(ALTS(CHAR('c'),ONE),STAR(ZERO))))))), 1) { (r: Rexp) => - val ss = stringsFromRexp(r) + val ss = Set("b")//stringsFromRexp(r) val boolList = ss.filter(s => s != "").map(s => { //val bdStrong = bdersStrong(s.toList, internalise(r)) - val bdStrong6 = bdersStrong6(s.toList, internalise(r)) - val bdStrong6Set = breakIntoTerms(erase(bdStrong6)) - val pdersSet = pderUNIV(r).flatMap(r => breakIntoTerms(r)) - // println(s) - // println(bdStrong6Set.size, pdersSet.size) - bdStrong6Set.size <= pdersSet.size + val bdStrong6 = bdersStrong7(s.toList, internalise(r)) + val bdStrong6Set = strongBreakIntoTerms(erase(bdStrong6)) + val pdersSet = pderUNIV(r)//.flatMap(r => strongBreakIntoTerms(r)) + val pdersSetBroken = pdersSet.flatMap(r => strongBreakIntoTerms(r)) + bdStrong6Set.size <= pdersSet.size || bdStrong6Set.size <= pdersSetBroken.size }) - // println(boolList) + //println(boolList) //!boolList.exists(b => b == false) !boolList.exists(b => b == false) } - - } // small() -generator_test() - +// generator_test() + + + //STAR(STAR(STAR(SEQ(SEQ(ALTS(STAR(ALTS(ONE,CHAR('c'))), + // CHAR('c')),ALTS(CHAR('a'),ALTS(STAR(CHAR('b')),ALTS(CHAR('a'),ZERO)))), + // CHAR('c')))))//SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE)))//STAR(SEQ(ALTS(STAR(CHAR('c')),CHAR('c')),SEQ(ALTS(CHAR('c'),ONE),ONE))) +//counterexample1: STAR(SEQ(ALTS(STAR(ZERO),ALTS(CHAR(a),CHAR(b))),SEQ(ONE,ALTS(CHAR(a),CHAR(b))))) +//counterexample2: SEQ(ALTS(SEQ(CHAR(a),STAR(ONE)),STAR(ONE)),ALTS(CHAR(a),SEQ(ALTS(CHAR(c),CHAR(a)),CHAR(b)))) def counterexample_check() { - val r = SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE)))//STAR(SEQ(ALTS(STAR(CHAR('c')),CHAR('c')),SEQ(ALTS(CHAR('c'),ONE),ONE))) - val s = "ccc" - val bdStrong5 = bdersStrong6(s.toList, internalise(r)) + val r = SEQ(SEQ(STAR(ALTS(CHAR('a'),CHAR('b'))), + ALTS(ALTS(CHAR('c'),CHAR('b')),STAR(ONE))),STAR(CHAR('b'))) + val s = "b" + val bdStrong5 = bdersStrong7(s.toList, internalise(r)) val bdStrong5Set = breakIntoTerms(erase(bdStrong5)) - val pdersSet = pderUNIV(r)//.map(oneSimp).flatMap(r => breakIntoTerms(r)) + val pdersSet = pderUNIV(r)//.map(r => erase(bsimp(internalise(r))))//.flatMap(r => strongBreakIntoTerms(r))//.map(oneSimp).flatMap(r => breakIntoTerms(r)) println("original regex ") rprint(r) println("after strong bsimp") @@ -1273,8 +1289,10 @@ rsprint(bdStrong5Set) println("after pderUNIV") rsprint(pdersSet.toList) + } -// counterexample_check() +counterexample_check() + def linform_test() { val r = STAR(SEQ(STAR(CHAR('c')),ONE))//SEQ(STAR(CHAR('c')),STAR(SEQ(STAR(CHAR('c')),ONE))) // val r_linforms = lf(r)