diff -r 7a349fe58bf4 -r 622ddbb1223a lex_blex_Frankensteined.scala --- a/lex_blex_Frankensteined.scala Fri Mar 15 12:27:12 2019 +0000 +++ b/lex_blex_Frankensteined.scala Sat Mar 16 14:14:42 2019 +0000 @@ -88,10 +88,10 @@ } internalise(("a" | "ab") ~ ("b" | "")) -/* + def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match { case (ONE, bs) => (Empty, bs) - case (PRED(f), C(c)::bs) => (Chr(c), bs) + case (CHAR(f), C(c)::bs) => (Chr(c), bs) case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems tailor made for those who want to simplify the regex before der or simp case (ALTS(rs), bs) => bs match { case Z::bs1 => { @@ -125,7 +125,7 @@ case (v, Nil) => v case _ => throw new Exception("Not decodable") } -*/ + //erase function: extracts the regx from Aregex def erase(r:ARexp): Rexp = r match{ @@ -370,54 +370,8 @@ case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2) case r1 :: rs2 => r1 :: rflats(rs2) } - //val flats_time = scala.collection.mutable.ArrayBuffer.empty[Long] - //val dist_time = scala.collection.mutable.ArrayBuffer.empty[Long] var flats_time = 0L var dist_time = 0L - /* - def bsimp(r: ARexp, depth: Int): ARexp = - { - r match { - case ASEQ(bs1, r1, r2) => (bsimp(r1, depth), bsimp(r2, depth)) match { - case (AZERO, _) => AZERO - case (_, AZERO) => AZERO - case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) - case (r1s, r2s) => ASEQ(bs1, r1s, r2s) - } - case AALTS(bs1, rs) => { - depth match { - case 0 => { - flats(distinctBy(rs, erase)) match { - case Nil => AZERO - case s :: Nil => fuse(bs1, s) - case rs => AALTS(bs1, rs) - } - } - case n => { - val rs_simp = rs.map(bsimp(_, n - 1)) - val time2 = System.nanoTime() - val flat_res = flats(rs_simp) - val time3 = System.nanoTime() - val dist_res = distinctBy(flat_res, erase) - val time4 = System.nanoTime() - flats_time = flats_time + time3 - time2 - dist_time = dist_time + time4 - time3 - //flats_time += time3 - time2 - //dist_time += time4 - time3 - //distinctBy(flats(rs.map(bsimp)), erase) match { - dist_res match { - case Nil => AZERO - case s :: Nil => fuse(bs1, s) - case rs => AALTS(bs1, rs) - } - } - } - } - case r => r - } - } - */ - //----------------------------------------------------------------------------This bsimp is the original slow one def bsimp(r: ARexp): ARexp = r match { case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match { @@ -428,49 +382,40 @@ } case AALTS(bs1, rs) => { val rs_simp = rs.map(bsimp) - val time2 = System.nanoTime() val flat_res = flats(rs_simp) - val time3 = System.nanoTime() val dist_res = distinctBy(flat_res, erase) - val time4 = System.nanoTime() - flats_time = flats_time + time3 - time2 - dist_time = dist_time + time4 - time3 dist_res match { case Nil => AZERO case s :: Nil => fuse(bs1, s) case rs => AALTS(bs1, rs) } } - case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) + //case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) case r => r } - - def bsimp_weakened(r: ARexp): ARexp = r match { - case ASEQ(bs1, r1, r2) => (bsimp_weakened(r1), r2) match { + def super_bsimp(r: ARexp): ARexp = r match { + case ASEQ(bs1, r1, r2) => (super_bsimp(r1), super_bsimp(r2)) match { case (AZERO, _) => AZERO case (_, AZERO) => AZERO case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) + case (AALTS(bs2, rs), r2) => AALTS(bs1 ++ bs2, rs.map(r => r match {case AONE(bs3) => fuse(bs3, r2) case r => ASEQ(Nil, r, r2)}) ) case (r1s, r2s) => ASEQ(bs1, r1s, r2s) } case AALTS(bs1, rs) => { - val rs_simp = rs.map(bsimp_weakened) - val time2 = System.nanoTime() + val rs_simp = rs.map(super_bsimp) val flat_res = flats(rs_simp) - val time3 = System.nanoTime() val dist_res = distinctBy(flat_res, erase) - val time4 = System.nanoTime() - flats_time = flats_time + time3 - time2 - dist_time = dist_time + time4 - time3 dist_res match { case Nil => AZERO case s :: Nil => fuse(bs1, s) case rs => AALTS(bs1, rs) } } - case ASTAR(bs, r) => ASTAR(bs, bsimp_weakened(r)) + //case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) case r => r } + def simp_weakened(r: Rexp): Rexp = r match { case SEQ(r1, r2) => (simp_weakened(r1), r2) match { case (ZERO, _) => ZERO @@ -532,66 +477,32 @@ case Nil => { if (bnullable(r)) { //println(asize(r)) - val time4 = System.nanoTime() - val bits = mkepsBC(r) - val time5 = System.nanoTime() - mkepsBC_time = time5 - time4 - bits + mkepsBC(r) } else throw new Exception("Not matched") } case c::cs => { - val time1 = System.nanoTime() val der_res = bder(c,r) - val time2 = System.nanoTime() val simp_res = bsimp(der_res) - val time3 = System.nanoTime() - bder_time = bder_time + time2 - time1 - bsimp_time = bsimp_time + time3 - time2 blex_simp(simp_res, cs) } } + def super_blex_simp(r: ARexp, s: List[Char]): Bits = s match { + case Nil => { + if (bnullable(r)) { + mkepsBC(r) + } + else throw new Exception("Not matched") + } + case c::cs => { + super_blex_simp(super_bsimp(bder(c,r)), cs) + } + } def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{ case Nil => r case c::cs => blex_real_simp(bsimp(bder(c, r)), cs) } - //-------------------------------------------------------------------------------------tests whether simp(simp(r)) == simp(r) holds true - /* - def blex_simp(r: ARexp, s: List[Char]) : Bits = s match { - case Nil => { - if (bnullable(r)) { - //println(asize(r)) - mkepsBC(r) - } - else throw new Exception("Not matched") - } - case c::cs => { - val der_res = bder(c,r) - val simp_res = bsimp(der_res) - //val simp_res2 = bsimp(simp_res) - //println("Size reduction from "+asize(der_res)+ " to " +asize(simp_res)+" to " + asize(simp_res2)) - blex_simp(simp_res, cs) - } - } - */ - /* - def lex_simp(r: Rexp, s: List[Char]) : Val = s match { - case Nil => { - if (nullable(r)) { - mkeps(r) - } - else throw new Exception("Not matched") - } - case c::cs => { - val start = System.nanoTime() - val (r_simp, f_simp) = simp(der(c, r)) - val end = System.nanoTime() - println((end - start)/1.0e9) - inj(r, c, f_simp(lex_simp(r_simp, cs))) - } - } - */ //size: of a Aregx for testing purposes def size(r: Rexp) : Int = r match { @@ -608,27 +519,11 @@ // decoding does not work yet def blexing_simp(r: Rexp, s: String) = { - //flats_time.clear() - //dist_time.clear() - flats_time = 0L - dist_time = 0L - bder_time = 0L - bsimp_time = 0L - mkepsBC_time = 0L - val start = System.nanoTime() val bit_code = blex_simp(internalise(r), s.toList) - val end = System.nanoTime() - println("total time: "+ (end - start)/1.0e9) - println("spent on flats: " + (flats_time/(1.0e9))) - println("spent on distinctBy: " + (dist_time/(1.0e9))) - println("spent on bder: "+ bder_time/1.0e9) - println("spent on bsimp: " + bsimp_time/1.0e9) - println("spent on mkepsBC: " + mkepsBC_time/1.0e9) - //println(s"The length of the string ${s.length}; length of bit sequence:") - //println((bit_code.length)) - //println(final_derivative) - //bit_code - //decode(r, bit_code) + decode(r, bit_code) + } + def super_blexing_simp(r: Rexp, s: String) = { + decode(r, super_blex_simp(internalise(r), s.toList)) } @@ -830,7 +725,7 @@ //println(s"Testing ${r}") for (s <- ss.par) yield { val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None) - val res2 = Try(Some(blexing_simp(r, s))).getOrElse(None) + val res2 = Try(Some(super_blexing_simp(r, s))).getOrElse(None) if (res1 != res2) println(s"Disagree on ${r} and ${s}") if (res1 != res2) println(s" ${res1} != ${res2}") if (res1 != res2) Some((r, s)) else None @@ -839,7 +734,7 @@ - //enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet + /* def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = { for (s <- ss){