# HG changeset patch # User Chengsong # Date 1642892248 0 # Node ID 5bffeacdf17e89811c35ba464372e5fee100d3f5 # Parent 4b22587fb667a85fe2b8140921ae3a2559708018 preserves! diff -r 4b22587fb667 -r 5bffeacdf17e thys2/zre8.sc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/thys2/zre8.sc Sat Jan 22 22:57:28 2022 +0000 @@ -0,0 +1,542 @@ +// package zre +//Zre5: eliminated mems table + + + +import scala.collection.mutable.{Map => MMap} +import scala.collection.mutable.{ArrayBuffer => MList} +//import pprint._ + +import scala.util.Try +import pprint._ + + +abstract class Val +case object Empty extends Val +case class Chr(c: Char) extends Val +case class Sequ(v1: Val, v2: Val) extends Val +case class Left(v: Val) extends Val +case class Right(v: Val) extends Val +case class Stars(vs: List[Val]) extends Val +case object DummyFilling extends Val + + +// abstract class Rexp { +// def equals(other: Rexp) : Boolean = this.eq(other) +// } +abstract class Rexp +case object ZERO extends Rexp // matches nothing +case object ONE extends Rexp // matches an empty string +case class CHAR(c: Char) extends Rexp // matches a character c +case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative +case class AL1(r1: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence +case class STAR(r: Rexp) extends Rexp +case object RTOP extends Rexp + + +//Seq a b --> Seq Seqa Seqb +//Seq a b --> Sequ chra chrb +//ALT r1 r2 --> mALT +// AltC L AltC R +var cyclicPreventionList : Set[Int]= Set() +abstract class Ctx(var starIters: Int = 0){ + //starIters = 0 +} +case object RootC extends Ctx +case class SeqC( mForMyself: Mem, processedSibling: List[Val], unpSibling: List[Rexp]) extends Ctx +case class AltC( mForMyself: Mem, wrapper: Val => Val) extends Ctx +case class StarC(mForMyself: Mem, vs: List[Val], inside: Rexp) extends Ctx + +case class Mem(var parents: MList[Ctx], var result : MList[(Int, Val)]) + +//AltC(Mem(RootC::Nil, Map())) + + + +type Zipper = (Val, Mem) + +var mems : MMap[(Int, Rexp), Mem] = MMap() + //start pos, original regex --> result entry + + +var pos : Int = 0 + + + +//input .................. +// ^ ^ +// p q +// r + +//parse r[p...q] --> v + +//(a+aa)* +//aaa +//[R(Sequ(a, a)), vs] +//[L(a), L(a), vs] +def check_before_down(c: Ctx, r: Rexp, starIters: Int = 0) : List[Zipper] = { + c.starIters = starIters + mems.get((pos, r)) match { + case Some(m) => + // c match { + // case StarC(m, vs, inside) => vs.length + // } + val idx = m.parents.lastIndexWhere(c0 => c0.starIters < c.starIters) + if(m.parents.size == 2){ + println("third parent added") + println(simpleCtxDisplay(c)) + println("the other parents") + m.parents.foreach(c00 => println(c00.starIters)) + println(idx + 1) + println("c's star iters: "+ c.starIters) + println(s"the others' star iters: ${m.parents(0).starIters}") + } + m.parents.insert(idx + 1, c) + //m.parents = m.parents:::List(c) + m.result.find(endPos_value => endPos_value._1 == pos) match { + // case Some((i, v)) => + // original_up(v, c, starIters) + case None => + List() + } + case None => + val m = Mem(MList(c), MList.empty[(Int, Val)]) + mems = mems + ((pos, r) -> m) + original_down(r, m, starIters) + } + // val m = Mem(c::Nil, MList.empty[(Int, Val)]) + // original_down(r, m, d) +} + +//mems pstart r --> m parents [(pend, vres), ...] +//aaa +//012 +//seq a a +//0 a~a --> m ... [(2, Sequ a a)] + // c match { + // case StarC(mst, vst, rst) => print(s"StarC $vst\t") + // case SeqC(mse, pr, unp) => print(s"SeqC $unp\t") + // case AltC(mal, w) => print(s"AltC ${w(Empty)}\t") + // case RootC => print("Root") + // } +def reorderCtx(cs: List[Ctx]): List[Ctx] = { + Nil +} + +def mem_up(vres: Val, m: Mem, starIters : Int = 0) : List[Zipper] = { + m.result += (pos -> vres) + //m.parents = m.parents.reverse + + // if(m.parents.size > 1){//println() + // println() + // println("each of the contexts") + // m.parents.reverse.foreach (c => + // println(simpleCtxDisplay(c)) + // ) + // println("after distinctCtx") + // distinctCtx(m.parents.reverse).foreach(c => + // println(simpleCtxDisplay(c)) + // ) + // //println(s"vs is $vss") + + // } + //.distinctBy(zipBackToRegex(_)) + //TODO: too many arraybuffer->list conversions + (m.parents).distinctBy(zipBackToRegex(_)).flatMap((c: Ctx) => + original_up(vres, c, starIters) + ).toList + // m.parents.reverse.flatMap((c: Ctx) => + // original_up(vres, c, rec_depth) + // ) + // original_up(vres, m.parents.last, rec_depth) +} + +def original_down(r: Rexp, m: Mem, starIters: Int = 0) : List[Zipper] = (r, m) match { + case (CHAR(b), m) => { + if (input(pos) == b) { + List((Chr(b), m)) + } + else + Nil + } + case (ONE, m) => Nil//mem_up(Empty, m, starIters) + case (SEQ(r1, r2), m) => + // if(nullable(r1)){ + // val mprime = Mem(AltC(m, x => x )::Nil, MList.empty[(Int, Val)]) + // check_before_down(SeqC(mprime, Nil, List(r2)), r1, starIters) ::: + // check_before_down(SeqC(mprime, mkeps(r1)::Nil, Nil), r2, starIters) + // } + // else + check_before_down(SeqC(m, Nil, List(r2)), r1, starIters) + case (ALT(r1, r2), m) => + check_before_down(AltC(m, Left(_)), r1, starIters) ::: + check_before_down(AltC(m, Right(_)), r2, starIters) + case (STAR(r0), m) => + check_before_down(StarC(m, Nil, r0), r0, starIters) ::: + mem_up(Stars(Nil), m, starIters) + case (_, _) => throw new Exception("original down unexpected r or m") +} + +def original_up(v: Val, c: Ctx, starIters: Int = 0) : List[Zipper] = +{ + +(v, c) match { + case (v, SeqC(m, v1::Nil, Nil)) => + mem_up(Sequ(v1, v), m, starIters) + case (v, SeqC(m, vs, u1::Nil)) => + check_before_down(SeqC(m, v::vs, Nil), u1, starIters) + case (v, AltC(m, wrap)) => m.result.find(tup2 => tup2._1 == pos) match { + case Some( (i, vPrime) ) => + m.result += (i -> wrap(v)) + Nil + case None => + mem_up(wrap(v), m, starIters) + } //mem_up(AL1(v), par) + //case (v, StarC(m, vs, r0)) => throw new Exception("why not hit starC") + + case (v, RootC) => + Nil + case (v, StarC(m, vs, r0) ) => //mem_up(Stars(v::vs), m, starIters) //::: + check_before_down(StarC(m, v::vs, r0), r0, starIters + 1) ::: + mem_up(Stars((v::vs).reverse), m, starIters) + case (_, _) => throw new Exception("hit unexpected context") +} + +} + + +def derive(p: Int, z: Zipper) : List[Zipper] = { + pos = p + //println(s"z's actual size is ${actualZipperSize(z::Nil)}") + + z match { + case (v, m) => + + mem_up(v, m) + case _ => throw new Exception("error") + } +} +//let e' = Seq([]) in +// +def init_zipper(r: Rexp) : Zipper = { + val m_top = Mem(MList(RootC), MList.empty[(Int, Val)]) + val c_top = SeqC(m_top, Nil, r::Nil) + val m_r = Mem(MList(c_top), MList.empty[(Int, Val)]) + println(s"initial zipper is (Empty, $m_r)") + (Empty, m_r)//TODO: which val should we start with? Maybe Empty, maybe doesn't matter + // val dummyRexp = ONE + // val dummyMem = Mem() + +} + + +def plug_convert(v: Val, c: Ctx) : List[Val] = +{ + +c match { + case RootC => List(v) + //TODO: non-binary Seq requires ps.rev + case SeqC(m, ps::Nil, Nil) => + plug_mem(Sequ(ps, v), m) + + //TODO: un not nullable--partial values? + case SeqC(m, Nil, un::Nil) => + if(nullable(un)) + plug_mem(Sequ(v, mkeps(un)), m) + else + Nil + + //TODO: when multiple results stored in m, which one to choose? + case AltC(m, wrap) => + plug_mem(wrap(v), m) + case StarC(m, vs, r0) => plug_mem(Stars((v::vs).reverse), m) +} + +} + + +var cnt = 0; +def plug_mem(v: Val, m: Mem) : List[Val] = { + m.result += (pos -> v) + //TODO: eliminate arraybuffer->list conversion + m.parents.flatMap({c => + plug_convert(v, c) + } + ).toList +} + +def plug_all(zs: List[Zipper]) : List[Val] = { + zs.flatMap(z => plug_mem(z._1, z._2)) +} + + +def mkeps(r: Rexp) : Val = r match { + case ONE => Empty + case ALT(r1, r2) => + if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) + case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) + case _ => DummyFilling +} + +def nullable(r: Rexp) : Boolean = r match { + case ZERO => false + case ONE => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case _ => false +} + + +val tokList : List[Char] = "aab".toList +var input : List[Char] = tokList + + + + + + + +def lexRecurse(zs: List[Zipper], index: Int) : List[Zipper] = { + if(index < input.length ) + lexRecurse(zs.flatMap(z => derive(index, z) ), index + 1) + else + zs +} + +def lex(r: Rexp, s: String) : List[Zipper] = { + input = s.toList + + lexRecurse(init_zipper(r)::Nil, 0) +} + + + +implicit def charlist2rexp(s: List[Char]): Rexp = s match { + case Nil => ONE + case c::Nil => CHAR(c) + case c::cs => SEQ(CHAR(c), charlist2rexp(cs)) +} +implicit def string2Rexp(s: String) : Rexp = charlist2rexp(s.toList) + +implicit def RexpOps(r: Rexp) = new { + def | (s: Rexp) = ALT(r, s) + def ~ (s: Rexp) = SEQ(r, s) + def % = STAR(r) +} + +implicit def stringOps(s: String) = new { + def | (r: Rexp) = ALT(s, r) + def | (r: String) = ALT(s, r) + def ~ (r: Rexp) = SEQ(s, r) + def ~ (r: String) = SEQ(s, r) + def % = STAR(s) + +} + +//derive(0, init_zipper(re0)) + +// println(re1s.length) +// mems.foreach(a => println(a)) +// val re1sPlugged = plug_all(re1s) +// re1sPlugged.foreach(zipper => { +// println(zipper); +// println("delimit") +// }) + +// mems.clear() +// println(mems) +// println(re0) +// val re2s = lex(re0, "aab") +// val re2sPlugged = plug_all(re2s) +// re2sPlugged.foreach(v => { +// val Sequ(Empty, vp) = v +// println(vp) +// } +// ) +// val re0 = SEQ(ALT(CHAR('a'), SEQ(CHAR('a'),CHAR('a'))), +// ALT(SEQ(CHAR('a'), CHAR('b')), SEQ(CHAR('b'), CHAR('c')) ) +// ) + +// val (rgraph, re0root) = makeGraphFromObject(re0) +// val asciir = GraphLayout.renderGraph(rgraph) +// println("printing out re0") +// println(asciir) +// val re1s = lex(re0, "aabc") + +def actualZipperSize(zs: List[Zipper]) : Int = zs match { + case Nil => 0 + case z::zs1 => countParents(z._2) + actualZipperSize(zs1) +} + +def countParents(m: Mem) : Int = { + m.parents.map(c => countGrandParents(c)).sum +} + +def countGrandParents(c: Ctx) : Int = { + c match { + case RootC => 1 + case SeqC(m, pr, unp) => countParents(m) + case AltC(m, w) => countParents(m) + case StarC(m, _, _) => countParents(m) + } +} +//(a+aa)* \a --> (1 + a)(a+aa)* --> (a+aa)* + (1+a)(a+aa)* + +//a(a+aa)* + 1(a+aa)* + (a+aa)* + +//a~(a + aa)* \a -> 1 ~ (a + aa)* +//va <-----> m --> SeqC(m1, pr, "a") --> AltC(m4, Right)--> StarC(m2, vs, "a" + "aa") --> SeqC(m) ---> Root +// ^ +// ---> AltC(m4, Left) +def zipBackToRegex(c: Ctx, r: Rexp = ONE) : Rexp = { + c match { + case RootC => r + case SeqC(m, pr, Nil) => zipBackToRegex(m.parents.head, r) + case SeqC(m, pr, unp::Nil) => zipBackToRegex(m.parents.head, SEQ(r, unp)) + case AltC(m, w) => zipBackToRegex(m.parents.head, r) + case StarC(m, vs, r0) => zipBackToRegex(m.parents.head, SEQ(r, STAR(r0))) + } +} + +def zipperSimp(z: Zipper) : Unit = z match { + case (v, m) => //m.parents = m.parents.distinctBy(c => zipBackToRegex(c)) +} + +def distinctCtx(cs: List[Ctx]) : List[Ctx] = cs.distinctBy(c => zipBackToRegex(c)) + + +def simpleCtxDisplay(c: Ctx, indent : Int = 0) : String = c match { + case SeqC(m, pr, unp) => "Sc[m:" ++ printMem(m, indent + 1) ++ + "pr:" ++ pr.map(v => shortValOutput(v)).mkString(", ") ++ " unp:" ++ unp.map(r2 => shortRexpOutput(r2)).mkString(", ") ++ "]" + case AltC(m, w) => + w(Empty) match { + case Left(_) => s"Ac(m:${printMem(m, indent + 1)}, Left(_))" + case Right(_) => s"Ac(m:${printMem(m, indent + 1)}, Right(_))" + case Empty => s"Ac(m:${printMem(m, indent + 1)}, id)" + } + case StarC(m, vs, r0) => s"StarC[m:" ++ printMem(m, indent + 1) ++ + "vs:" ++ vs.map(v => shortValOutput(v)).mkString(", ") ++ " r0: " ++ shortRexpOutput(r0) + case RootC => "Root" + //case AL1(r) => s"(+${shortRexpOutput(r)})" + //case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")" + //case RTOP => "RTOP" + } + +def printMem(m: Mem, indent: Int = 0) : String = { + "M(par:" ++ + m.parents.map(c => simpleCtxDisplay(c, indent + 1)).mkString("(",",", ")") ++ + (", results:") ++ + (for(iRexp <- m.result) + yield iRexp match {case (p: Int, v: Val) => s"$p->${shortValOutput(v)}"} + ).mkString("(",",", ")") ++ + ")" +} + +def shortRexpOutput(r: Rexp) : String = r match { + case CHAR(c) => c.toString + case ONE => "1" + case ZERO => "0" + case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]" + case ALT(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")" + case STAR(r) => "[" ++ shortRexpOutput(r) ++ "]*" + //case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")" + case RTOP => "RTOP" + } + +def shortValOutput(v: Val) : String = v match { + case Left(v) => "L(" ++ shortValOutput(v) ++ ")" + case Right(v) => "R(" ++ shortValOutput(v) ++ ")" + case Empty => "e" + case Sequ(v1, v2) => "[" ++ shortValOutput(v1) ++ "~" ++ shortValOutput(v2) ++ "]" + case Chr(a) => a.toString + case Stars(vs) => "Stars" ++ vs.map(shortValOutput(_)).mkString("[", ",", "]") + case _ => "???" +} + + +//def crystalizeZipper + +for(i <- 1 to 2) { + mems.clear() +println(s"there are $i number of a's") +val re1 = ("a" | "aa" | "ab").%//(("a" | "b") ~ "c" | ("b" | "e") ~ "c" ) ~ "f" +val re1Lexed = lex(re1, "a"*i) + +//drawZippers(re1Lexed) +println("size of actual zipper (including memoized contexts") +println(actualZipperSize(re1Lexed)) +//println(re1Lexed) +//re1Lexed.foreach(zipperSimp(_)) +//println(actualZipperSize(re1S)) +val re1resPlugged = plug_all(re1Lexed) +//println(actualZipperSize(re1Lexed)) + +println("value extracted") +re1resPlugged.foreach(v => { + val Sequ(Empty, vp) = v + println(vp) +} +) + + val mb = 1024*1024 +val runtime = Runtime.getRuntime +println("ALL RESULTS IN MB") +println("** Used Memory: " + (runtime.totalMemory - runtime.freeMemory) / mb) +println("** Free Memory: " + runtime.freeMemory / mb) +println("** Total Memory: " + runtime.totalMemory / mb) +println("** Max Memory: " + runtime.maxMemory / mb) + +} + +mems.clear() +val re2 = SEQ(ONE, "a") +val re2res = lex(re2, "a") +//lex(1~a, "a") --> lexRecurse((1v, m (SeqC(m (RootC, Nil), Nil, [1~a] ) ))) + + +println(re2res) + +val re2resPlugged = plug_all(re2res) +re2resPlugged.foreach(v => { + val Sequ(Empty, vp) = v + println(vp) +} +) + +// println("remaining regex") +// println(re1ss.flatMap(z => zipBackMem(z._2))) + + +// val re1ssPlugged = plug_all(re1ss) +// println("each of the values") +// re1ssPlugged.foreach(v => { +// //val Sequ(Empty, vp) = v +// //println(vp) +// println(v) +// } +// ) +// println(mems.size) +//println(mems) +//mems.map({case (ir, m) => if (ir._1 == 1 && ir._2 == CHAR('b')) println(printMem(m)) }) +// println("Mkeps + inj:") +// println( +// mems.get((0, re1)) match { +// case Some(m) => printMem(m) +// case None => "" +// } +// ) + +// println(re1sPlugged) +//drawZippers(re1s, plugOrNot = false) +// re1s.foreach{ +// re1 => +// { + +// drawZippers(derive(1, re1), plugOrNot = true) + +// } +// } + +