thys2/zre8.sc
author Chengsong
Mon, 10 Jul 2023 01:33:45 +0100 (18 months ago)
changeset 660 eddc4eaba7c4
parent 403 6291181fad07
permissions -rw-r--r--
addresses Gerog "N_r meaning and relation with backtracking?" comment
// package zre 
//Zre5: eliminated mems table



import scala.collection.mutable.{Map => MMap}
import scala.collection.mutable.{ArrayBuffer => MList}
//import pprint._

import scala.util.Try
import pprint._


abstract class Val
case object Empty extends Val
case class Chr(c: Char) extends Val
case class Sequ(v1: Val, v2: Val) extends Val
case class Left(v: Val) extends Val
case class Right(v: Val) extends Val
case class Stars(vs: List[Val]) extends Val
case object DummyFilling extends Val


// abstract class Rexp {
//      def equals(other: Rexp) : Boolean = this.eq(other)
// }
abstract class Rexp
case object ZERO extends Rexp                    // matches nothing
case object ONE extends Rexp                     // matches an empty string
case class CHAR(c: Char) extends Rexp            // matches a character c
case class ALT(r1: Rexp, r2: Rexp) extends Rexp  // alternative
case class AL1(r1: Rexp) extends Rexp
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp  // sequence
case class STAR(r: Rexp) extends Rexp
case object RTOP extends Rexp


//Seq a b --> Seq Seqa Seqb
//Seq a b --> Sequ chra chrb
//ALT r1 r2 --> mALT
//         AltC L   AltC R
var cyclicPreventionList : Set[Int]= Set()
abstract class Ctx(var starIters: Int = 0){
    //starIters = 0
}
case object RootC extends Ctx
case class SeqC( mForMyself:  Mem, processedSibling: List[Val], unpSibling: List[Rexp]) extends Ctx 
case class AltC( mForMyself:  Mem, wrapper: Val => Val) extends Ctx
case class StarC(mForMyself:  Mem, vs: List[Val], inside: Rexp) extends Ctx

case class Mem(var parents: MList[Ctx], var result : MList[(Int, Val)])

//AltC(Mem(RootC::Nil, Map()))



type Zipper = (Val, Mem)

var mems : MMap[(Int, Rexp), Mem] = MMap()
        //start pos, original regex --> result entry


var pos : Int = 0

  //size: of a Aregx for testing purposes 
  def size(r: Rexp) : Int = r match {
    case ZERO => 1
    case ONE => 1
    case CHAR(_) => 1
    case SEQ(r1, r2) => 1 + size(r1) + size(r2)
    case ALT(r1, r2) => 1 + List(r1, r2).map(size).sum
    case STAR(r) => 1 + size(r)
  }



//input ..................
//          ^       ^
//          p       q
//          r

//parse r[p...q] --> v

//(a+aa)*
//aaa
//[R(Sequ(a, a)), vs]
//[L(a), L(a), vs]
def check_before_down(c: Ctx, r: Rexp, starIters: Int = 0) : List[Zipper] = {
    c.starIters = starIters
    mems.get((pos, r)) match {
        case Some(m) => 
            // c match {
            //     case StarC(m, vs, inside) => vs.length
            // }
            val idx = m.parents.lastIndexWhere(c0 => c0.starIters <= c.starIters)
            // if(m.parents.size == 1){
            //     println("third parent added")
            //     println(simpleCtxDisplay(c))
            //     println("the other parents")
            //     m.parents.foreach(c00 => println(c00.starIters))
            //     println(idx + 1)
            //     println("c's star iters: "+ c.starIters)
            //     println(s"the others' star iters: ${m.parents(0).starIters}")
            // }
            m.parents.insert(idx + 1, c)
            //m.parents = m.parents:::List(c)
            m.result.find(endPos_value => endPos_value._1 == pos) match {
                // case Some((i, v)) => 
                //   original_up(v, c, starIters)
                case None => 
                  List()
            }
        case None => 
            val m = Mem(MList(c), MList.empty[(Int, Val)])
            mems = mems + ((pos, r) -> m)
            original_down(r, m, starIters)
    }
    // val m = Mem(c::Nil, MList.empty[(Int, Val)])
    // original_down(r, m, d)
}

//mems  pstart r  --> m parents [(pend, vres), ...]
//aaa
//012
//seq a a 
//0 a~a --> m ... [(2, Sequ a a)]
        // c match {
        //     case StarC(mst, vst, rst) => print(s"StarC $vst\t")
        //     case SeqC(mse, pr, unp) => print(s"SeqC $unp\t")
        //     case AltC(mal, w) => print(s"AltC ${w(Empty)}\t")
        //     case RootC => print("Root")
        // }
def reorderCtx(cs: List[Ctx]): List[Ctx] = {
    Nil
}

def mem_up(vres: Val, m: Mem, starIters : Int = 0) : List[Zipper] = {
    m.result += (pos -> vres)
    //m.parents = m.parents.reverse
          
    // if(m.parents.size > 1){//println()
    //     println()  
    //     println("each of the contexts")
    //     m.parents.reverse.foreach (c =>
    //         println(simpleCtxDisplay(c))
    //     )
    //     println("after distinctCtx")
    //     distinctCtx(m.parents.reverse).foreach(c =>
    //         println(simpleCtxDisplay(c))
    //     )
    //     //println(s"vs is $vss")
    
    // }
    //.distinctBy(zipBackToRegex(_))
    //TODO: too many arraybuffer->list conversions
    (m.parents).distinctBy(zipBackToRegex(_)).flatMap((c: Ctx) =>
        original_up(vres, c, starIters)
    ).toList
    // m.parents.reverse.flatMap((c: Ctx) =>
    //     original_up(vres, c, rec_depth)
    // )
    // original_up(vres, m.parents.last, rec_depth)
}

def original_down(r: Rexp, m: Mem, starIters: Int = 0) : List[Zipper] = (r, m) match {
    case (CHAR(b), m) => {
        if (input(pos) == b) {
            List((Chr(b), m)) 
        }
        else 
            Nil
    }
    case (ONE, m) => Nil//mem_up(Empty, m, starIters)
    case (SEQ(r1, r2), m) =>  
        // if(nullable(r1)){
        //     val mprime = Mem(AltC(m, x => x )::Nil, MList.empty[(Int, Val)])
        //     check_before_down(SeqC(mprime, Nil, List(r2)), r1, starIters) :::
        //     check_before_down(SeqC(mprime, mkeps(r1)::Nil, Nil), r2, starIters)
        // }
        // else
            check_before_down(SeqC(m, Nil, List(r2)), r1, starIters)
    case (ALT(r1, r2), m) => 
        check_before_down(AltC(m, Left(_)), r1, starIters) ::: 
        check_before_down(AltC(m, Right(_)), r2, starIters)
    case (STAR(r0), m) =>
        check_before_down(StarC(m, Nil, r0), r0, starIters) :::
        mem_up(Stars(Nil), m, starIters)
    case (_, _) => throw new Exception("original down unexpected r or m")
}

def original_up(v: Val, c: Ctx, starIters: Int = 0) : List[Zipper] = 
{

(v, c) match {
    case (v, SeqC(m, v1::Nil, Nil)) => 
        mem_up(Sequ(v1, v), m, starIters)
    case (v, SeqC(m, vs, u1::Nil)) => 
        check_before_down(SeqC(m, v::vs, Nil), u1, starIters)
    case (v, AltC(m, wrap)) => m.result.find(tup2 => tup2._1 == pos) match {
        case Some( (i, vPrime)  ) => 
            m.result += (i -> wrap(v))
            Nil
        case None => 
            mem_up(wrap(v), m, starIters)
    } //mem_up(AL1(v), par)
    //case (v, StarC(m, vs, r0)) => throw new Exception("why not hit starC")

    case (v, RootC) => 
        Nil
    case (v, StarC(m, vs, r0) ) => //mem_up(Stars(v::vs), m, starIters) //::: 
        check_before_down(StarC(m, v::vs, r0), r0, starIters + 1) :::
        mem_up(Stars((v::vs).reverse), m, starIters)
    case (_, _) => throw new Exception("hit unexpected context")
}

}


def derive(p: Int, z: Zipper) : List[Zipper] = {
    pos = p
    //println(s"z's actual size is ${actualZipperSize(z::Nil)}")
    
    z match {
        case (v, m) => 
            
            mem_up(v, m)
        case _ => throw new Exception("error")
    }
}
//let e' = Seq([]) in 
//
def init_zipper(r: Rexp) : Zipper = {
    val m_top = Mem(MList(RootC), MList.empty[(Int, Val)])
    val c_top = SeqC(m_top, Nil, r::Nil)
    val m_r = Mem(MList(c_top), MList.empty[(Int, Val)])
    println(s"initial zipper is (Empty, $m_r)")
    (Empty, m_r)//TODO: which val should we start with? Maybe Empty, maybe doesn't matter
    // val dummyRexp = ONE
    // val dummyMem = Mem()

}


def plug_convert(v: Val, c: Ctx) : List[Val] = 
{

c match {
    case RootC => List(v)
    //TODO: non-binary Seq requires ps.rev
    case SeqC(m, ps::Nil, Nil) => 
        plug_mem(Sequ(ps, v), m)

    //TODO: un not nullable--partial values?
    case SeqC(m, Nil, un::Nil) => 
        if(nullable(un))
            plug_mem(Sequ(v, mkeps(un)), m)
        else
            Nil

    //TODO: when multiple results stored in m, which one to choose?
    case AltC(m, wrap) => 
        plug_mem(wrap(v), m)
    case StarC(m, vs, r0) => plug_mem(Stars((v::vs).reverse), m)
}

}


var cnt = 0;
def plug_mem(v: Val, m: Mem) : List[Val] = {
    m.result += (pos -> v)
    //TODO: eliminate arraybuffer->list conversion
    m.parents.flatMap({c =>
        plug_convert(v, c)
    }
    ).toList
}

def plug_all(zs: List[Zipper]) : List[Val] = {
    zs.flatMap(z => plug_mem(z._1, z._2))
}


def mkeps(r: Rexp) : Val = r match {
  case ONE => Empty
  case ALT(r1, r2) => 
    if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
  case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
  case _ => DummyFilling
}

def nullable(r: Rexp) : Boolean = r match {
  case ZERO => false
  case ONE => true
  case CHAR(_) => false
  case ALT(r1, r2) => nullable(r1) || nullable(r2)
  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
  case _ => false
}


val tokList : List[Char] = "aab".toList
var input : List[Char] = tokList







def lexRecurse(zs: List[Zipper], index: Int) : List[Zipper] = {
    if(index <  input.length )
        lexRecurse(zs.flatMap(z => derive(index, z) ), index + 1)
    else 
        zs
}

def lex(r: Rexp, s: String) : List[Zipper] = {
    input = s.toList
    
    lexRecurse(init_zipper(r)::Nil,  0)
}



implicit def charlist2rexp(s: List[Char]): Rexp = s match {
    case Nil => ONE
    case c::Nil => CHAR(c)
    case c::cs => SEQ(CHAR(c), charlist2rexp(cs))
}
implicit def string2Rexp(s: String) : Rexp = charlist2rexp(s.toList)

implicit def RexpOps(r: Rexp) = new {
    def | (s: Rexp) = ALT(r, s)
    def ~ (s: Rexp) = SEQ(r, s)
    def % = STAR(r)
}

implicit def stringOps(s: String) = new {
    def | (r: Rexp) = ALT(s, r)
    def | (r: String) = ALT(s, r)
    def ~ (r: Rexp) = SEQ(s, r)
    def ~ (r: String) = SEQ(s, r)
    def % = STAR(s)

}

//derive(0, init_zipper(re0))

// println(re1s.length)
// mems.foreach(a => println(a))
// val re1sPlugged = plug_all(re1s)
// re1sPlugged.foreach(zipper => {
//                         println(zipper); 
//                         println("delimit") 
//                         })
                
// mems.clear()
// println(mems)
// println(re0)
// val re2s = lex(re0, "aab")
// val re2sPlugged = plug_all(re2s)
// re2sPlugged.foreach(v => {
//         val Sequ(Empty, vp) = v
//         println(vp)
//     }
// )
// val re0 = SEQ(ALT(CHAR('a'), SEQ(CHAR('a'),CHAR('a'))), 
// ALT(SEQ(CHAR('a'), CHAR('b')), SEQ(CHAR('b'), CHAR('c')) )
// )

// val (rgraph, re0root) = makeGraphFromObject(re0)
// val asciir = GraphLayout.renderGraph(rgraph)
// println("printing out re0")
// println(asciir)
// val re1s = lex(re0, "aabc")
 
def actualZipperSize(zs: List[Zipper]) : Int = zs match {
    case Nil => 0
    case z::zs1 => countParents(z._2) + actualZipperSize(zs1)
}

def countParents(m: Mem) : Int = {
    m.parents.map(c => countGrandParents(c)).sum
}

def countGrandParents(c: Ctx) : Int = {
    c match {
        case RootC => 1
        case SeqC(m, pr, unp) => countParents(m)
        case AltC(m, w) => countParents(m)
        case StarC(m, _, _) => countParents(m)
    }
}
//(a+aa)* \a --> (1 + a)(a+aa)* --> (a+aa)* + (1+a)(a+aa)*

//a(a+aa)* + 1(a+aa)* + (a+aa)*

//a~(a + aa)* \a -> 1 ~ (a + aa)* 
//va <-----> m --> SeqC(m1, pr, "a") --> AltC(m4, Right)--> StarC(m2, vs, "a" + "aa") --> SeqC(m) ---> Root
//           ^
//           ---> AltC(m4, Left) 
def zipBackToRegex(c: Ctx, r: Rexp = ONE) : Rexp = {
    c match {
        case RootC => r
        case SeqC(m, pr, Nil) => zipBackToRegex(m.parents.head, r)
        case SeqC(m, pr, unp::Nil) => zipBackToRegex(m.parents.head, SEQ(r, unp))
        case AltC(m, w) => zipBackToRegex(m.parents.head, r)
        case StarC(m, vs, r0) => zipBackToRegex(m.parents.head, SEQ(r, STAR(r0)))
    }
}

def zipperSimp(z: Zipper) : Unit = z match {
    case (v, m) => //m.parents = m.parents.distinctBy(c => zipBackToRegex(c))
}

def distinctCtx(cs: List[Ctx]) : List[Ctx] = cs.distinctBy(c => zipBackToRegex(c))


def simpleCtxDisplay(c: Ctx, indent : Int = 0) : String = c match {
    case SeqC(m, pr, unp) => "Sc[m:" ++ printMem(m, indent + 1) ++ 
        "pr:" ++ pr.map(v => shortValOutput(v)).mkString(", ") ++ " unp:" ++ unp.map(r2 => shortRexpOutput(r2)).mkString(", ") ++ "]"
    case AltC(m, w) =>
        w(Empty) match {
            case Left(_) => s"Ac(m:${printMem(m, indent + 1)}, Left(_))"
            case Right(_) => s"Ac(m:${printMem(m, indent + 1)}, Right(_))"
            case Empty => s"Ac(m:${printMem(m, indent + 1)}, id)"
        } 
    case StarC(m, vs, r0) => s"StarC[m:" ++ printMem(m, indent + 1) ++ 
        "vs:" ++ vs.map(v => shortValOutput(v)).mkString(", ") ++ " r0: " ++ shortRexpOutput(r0)
    case RootC => "Root"
    //case AL1(r) => s"(+${shortRexpOutput(r)})"
    //case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
    //case RTOP => "RTOP"
  }

def printMem(m: Mem, indent: Int = 0) : String = {
   "M(par:" ++
   m.parents.map(c => simpleCtxDisplay(c, indent + 1)).mkString("(",",", ")")  ++
  (", results:")  ++
  (for(iRexp <- m.result) 
    yield iRexp match {case (p: Int, v: Val) => s"$p->${shortValOutput(v)}"}
  ).mkString("(",",", ")")  ++ 
   ")" 
}

def shortRexpOutput(r: Rexp) : String = r match {
    case CHAR(c) => c.toString
    case ONE => "1"
    case ZERO => "0"
    case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]"
    case ALT(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")"
    case STAR(r) => "[" ++ shortRexpOutput(r) ++ "]*"
    //case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")"
    case RTOP => "RTOP"
  }

def shortValOutput(v: Val) : String = v match {
    case Left(v) => "L(" ++ shortValOutput(v) ++ ")"
    case Right(v) => "R(" ++ shortValOutput(v) ++ ")"
    case Empty => "e"
    case Sequ(v1, v2) => "[" ++ shortValOutput(v1) ++ "~" ++ shortValOutput(v2) ++ "]"
    case Chr(a) => a.toString
    case Stars(vs) => "Stars" ++ vs.map(shortValOutput(_)).mkString("[", ",", "]")
    case _ => "???"
}


//def crystalizeZipper

for(i <- 1 to 2) {
    mems.clear()
println(s"there are $i number of a's")
val re1 = ("a" | "ab" ) ~ ("c" | "bc")//(("a" | "b") ~ "c" | ("b" | "e") ~ "c" ) ~ "f"
val re1Lexed = lex(re1, "abc")

//drawZippers(re1Lexed)
println("size of actual zipper (including memoized contexts")
println(actualZipperSize(re1Lexed))
//println(re1Lexed)
//re1Lexed.foreach(zipperSimp(_))
//println(actualZipperSize(re1S))
val re1resPlugged = plug_all(re1Lexed)
//println(actualZipperSize(re1Lexed))

println("value extracted")
re1resPlugged.foreach(v => {
        val Sequ(Empty, vp) = v
        println(vp)
}
)


}

mems.clear()
val re2 = SEQ(ONE, "a")
val re2res = lex(re2, "a")
//lex(1~a, "a") --> lexRecurse((1v, m  (SeqC(m (RootC, Nil), Nil, [1~a] ) )))


println(re2res)

val re2resPlugged = plug_all(re2res)
re2resPlugged.foreach(v => {
        val Sequ(Empty, vp) = v
        println(vp)
}
)

// println("remaining regex")
// println(re1ss.flatMap(z => zipBackMem(z._2)))


// val re1ssPlugged = plug_all(re1ss)
// println("each of the values")
// re1ssPlugged.foreach(v => {
//         //val Sequ(Empty, vp) = v
//         //println(vp)
//         println(v)
//     }
// )
// println(mems.size)
//println(mems)
//mems.map({case (ir, m) => if (ir._1 == 1 && ir._2 == CHAR('b')) println(printMem(m)) })
// println("Mkeps + inj:")
// println(
//     mems.get((0, re1)) match {
//         case Some(m) => printMem(m)
//         case None => ""
//     }
//     )

// println(re1sPlugged)
//drawZippers(re1s, plugOrNot = false)
// re1s.foreach{
//   re1 => 
//   {

//     drawZippers(derive(1, re1), plugOrNot = true)

//   }
// }