lex_blex_Frankensteined.scala
changeset 17 3241b1e71633
parent 16 c51178fa85fe
child 59 8ff7b7508824
--- a/lex_blex_Frankensteined.scala	Wed May 08 22:09:59 2019 +0100
+++ b/lex_blex_Frankensteined.scala	Tue Jun 25 18:56:52 2019 +0100
@@ -384,11 +384,12 @@
       case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
       case r1 :: rs2 => r1 :: flats(rs2)
   }
+  /*
   def remove(v: Val): Val = v match{
     case Right(v1) => v1
     case Left(v1) => v1
     case _ => throw new Exception("Not removable")
-  }
+  }*/
   def augment(v: Val, i: Int): Val = if(i > 1) augment(Right(v), i - 1) else Right(v)
 //an overly complex version
 /*
@@ -448,24 +449,30 @@
     //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
     case r => r
   }
-  def find_pos(v: Val, rs: List[Rexp]): Int = (v, rs) match{
-    case (Right(v), r::Nil) => 1
-    case (Left(v), r::rs) => 0
+  def find_pos(v: Val, rs: List[ARexp]): Int = (v, rs) match{
+    case (v, r::Nil) => 0
     case (Right(v), r::rs) => find_pos(v, rs) + 1
-    case (v, _) => 0
+    case (Left(v), r::rs) => 0
+    //case (v, _) => 0
+  }
+  def find_pos_aux(v: Val, r: ARexp): Int = r match {
+    case AALTS(bs, rs) => find_pos(v, rs)
+    case r => 0
   }
-  def remove(v: Val, rs: List[Rexp]) : Val = (v,rs) match {//remove the outmost layer of ALTS's Left and Right
-    case (Right(v), r::Nil) => v 
+  def remove(v: Val, rs: List[ARexp]) : Val = (v,rs) match {//remove the outmost layer of ALTS's Left and Right
+    //we have to use r to detect the bound of nested L/Rs
+    case (v, r::Nil) => v
+    case (Right(v), r::rs) => remove(v, rs) 
     case (Left(v), r::rs) => v 
-    case (Right(v), r::rs) => remove(v, rs)
+    //case (v, r::Nil) => v
   }
   def simple_end(v: Val): Boolean = v match {
     case Left(v) => return false
     case Right(v) => return simple_end(v)
     case v => return true
   }
-  def isend(v: Val, rs: List[ARexp], position: Int): Boolean = {
-    val rsbh = rs.slice(position, rs.length)
+  def isend(v: Val, rs: List[ARexp], position: Int): Boolean = {//TODO: here the slice api i am not familiar with so this call might be incorrect and crash the bsimp2
+    val rsbh = rs.slice(position + 1, rs.length)
     val out_end = if(flats(rsbh) == Nil) true else false
     val inner_end = simple_end(v)
     inner_end && out_end
@@ -479,7 +486,72 @@
     case 0 => v
     case i => coat(Right(v), i - 1)
   }
-  /*
+  //This version takes a regex and a value, return a simplified regex and its corresponding simplified value 
+  def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r,v) match{
+    case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match {
+        case ((AZERO, _), (_, _) )=> (AZERO, undefined)
+        case ((_, _), (AZERO, _)) => (AZERO, undefined)
+        case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx.
+        case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s),  Sequ(v1s, v2s))
+    }
+    case (AALTS(bs1, rs), v) => {
+      //phase 1 transformation so that aalts(bs1, rs) => aalts(bs1, rsf) and v => vf
+      val init_ind = find_pos(v, rs)
+      //println(rs)
+      //println(v)
+      val vs = bsimp2(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to  match the regx rs[i]
+      //println(vs)
+      val rs_simp = rs.map(bsimp)
+      val vs_kernel = rs_simp(init_ind) match {
+        case AALTS(bs2, rs2) => remove(vs._2, rs2)//remove the secondary layer of left and right
+        case r => vs._2
+      }
+      val flat_res = flats(rs_simp)
+      //println(rs_simp)
+      //println(flat_res)
+      //println(init_ind)
+      val vs_for_coating = if(isend(vs._2, rs_simp, init_ind)||flat_res.length == 1) vs_kernel else Left(vs_kernel)
+      //println(vs_for_coating)
+      val r_s = rs_simp(init_ind)//or vs._1
+      val shift = flats_vsimp(rs_simp, init_ind) + find_pos_aux(vs._2, rs_simp(init_ind))
+      //println(shift)
+      val new_ind = init_ind + shift
+      //println("new ind:")
+      //println(new_ind)
+      val vf = coat(vs_for_coating, new_ind)
+      //println("vf:")
+      //println(vf)
+      //flats2 returns a list of regex and a single v
+      //now |- vf: ALTS(bs1, flat_res)
+
+      //phase 2 transformation so that aalts(bs1, rsf) => aalts(bs, rsdb) and vf => vdb
+      val dist_res = distinctBy(flat_res, erase)
+      val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase)
+      //val size_reduction = new_ind + 1 - front_part.length
+      //println(flat_res.length)
+      //println(dist_res)
+      //println(front_part)
+      val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list
+      {
+        coat(vs_kernel, front_part.length - 1)
+      }
+      else{
+        coat(Left(vs_kernel), front_part.length - 1)
+      }
+      //println(vdb)
+      //we don't need to transform vdb as this phase will not make enough changes to the regex to affect value.
+      //the above statement needs verification. but can be left as is now.
+      dist_res match {
+        case Nil => (AZERO, undefined)
+        case s :: Nil => (fuse(bs1, s), vdb)
+        case rs => (AALTS(bs1, rs), vdb)
+      }
+    }
+    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
+    case (r, v) => (r, v)  
+  }
+  def vsimp(r: ARexp, v: Val): Val = bsimp2(r, v)._2
+  /*This version was first intended for returning a function however a value would be simpler.
   def bsimp2(r: ARexp, v: Val): (ARexp, Val => Val) = (r,v) match{
     case (ASEQ(bs1, r1, r2), v) => (bsimp2(r1), bsimp2(r2)) match {
         case ((AZERO, _), (_, _) )=> (AZERO, undefined)
@@ -890,5 +962,6 @@
 case class Right(v: Val) extends Val
 case class Stars(vs: List[Val]) extends Val
 case class Rec(x: String, v: Val) extends Val
+case object undefined extends Val
 //case class Pos(i: Int, v: Val) extends Val
 case object Prd extends Val