lex_blex_Frankensteined.scala
changeset 148 c8ef391dd6f7
parent 109 79f347cb8b4d
child 150 b51d34113d47
--- a/lex_blex_Frankensteined.scala	Wed Mar 04 13:25:52 2020 +0000
+++ b/lex_blex_Frankensteined.scala	Fri Apr 10 11:58:11 2020 +0100
@@ -137,7 +137,9 @@
     case Stars(v::vs) => S::code(v) ::: code(Stars(vs))
   }
 
-
+  //note that left and right are not recorded
+  //although they guide the retrival
+  //in contrast with stars 
   def retrieve(r: ARexp, v: Val): Bits = (r,v) match {
     case (AONE(bs), Empty) => bs
     case (ACHAR(bs, c), Chr(d)) => bs
@@ -256,7 +258,40 @@
     case RECD(x, r) => Rec(x, mkeps(r))
     //case PLUS(r) => Stars(List(mkeps(r)))
   }
+  def haschr(r: Rexp) : Boolean = r match {
+    case CHAR(c) => true
+    case STAR(r0) => haschr(r0)
+    case SEQ(r1, r2) => haschr(r1) && nullable(r2) || haschr(r2) && nullable(r1)
+    case ALTS(List(r1, r2)) => haschr(r1) || haschr(r2)
+    case ONE => false
+    case ZERO => false
+  }
+  def haschar(r: Rexp, c: Char) : Boolean = r match {
+    case CHAR(d) => if(c == d) true else false
+    case SEQ(r1, r2) => if(haschar(r1, c) && nullable(r2)) true else if(haschar(r2, c) && nullable(r1) ) true else false
+    case STAR(r) => if(haschar(r, c)) true else false
+    case ALTS(List(r1, r2)) => if(haschar(r1, c) || haschar(r2, c)) true else false
+    case ONE => false
+    case ZERO => false 
+  }
+  def mkchr(r: Rexp) : Val = r match {
+    case SEQ(r1, r2) => 
+      if(haschr(r1) && nullable(r2)) Sequ(mkchr(r1), mkeps(r2)) 
+      else if(nullable(r1) && haschr(r2)) Sequ(mkeps(r1), mkchr(r2))
+      else throw new Exception(r.toString)
+    case ALTS(List(r1, r2)) => if (haschr(r1)) Left(mkchr(r1)) else Right(mkchr(r2))
+    case STAR(r0) => Stars(List(mkchr(r0)))
+    case CHAR(c) => Chr(c)
+    case _ => throw new Exception("the regex you gave me can't make a char")
+  }
+  def mkchar(r: Rexp, c: Char) : Val = r match {
+    case CHAR(d) => Chr(c)//if(c == d)  Chr(c) else error
+    case ALTS(List(r1, r2)) =>
+      if (haschar(r1, c)) Left(mkchar(r1, c)) else Right(mkchar(r2, c))
 
+    case SEQ(r1, r2) => {if(haschar(r1, c)) Sequ(mkchar(r1, c), mkeps(r2)) else Sequ(mkeps(r1), mkchar(r2, c))}
+    case STAR(r) =>Stars(List(mkchar(r, c)))
+  }
   def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
     case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
     case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
@@ -268,6 +303,28 @@
     case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
     //case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   }
+  def fuzzy_inj(r: ARexp, c: Char, v: Val) : Val = v match {
+    case Stars(vs) => r match {//vs
+      case ASTAR(bs, r0) => inj(     erase(r), c, Sequ(mkeps(erase(bder(c, r0))), v)     )
+      case ASEQ(bs, r1, r2) => inj(erase(r), c, Sequ(mkeps(erase(bder(c, r1))), v) )
+    }
+    case Sequ(v1, v2) => r match {
+      case ASTAR(bs, r0) => inj(erase(r), c, Sequ(mkchar(erase(bder(c, r0)), c), v2))
+      case _ => v
+    }
+    case _ => v
+  }
+  /*def gen_rect(r: Rexp) : Val => Val = {
+    //lingqi
+    //buyao sanle 
+    val r1 = bsimp(r)
+
+  }
+  def fuzzy_inj(r: ARexp, c: Char, v: Val) : Val = {
+    val f = gen_rect(r)
+    val vo = f(v)
+    inj(r, c, vo)
+  }*/
   def lex(r: Rexp, s: List[Char]) : Val = s match {
     case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
     case c::cs => inj(r, c, lex(der(c, r), cs))
@@ -453,16 +510,16 @@
     if(rel_dist >0){//the regex we are dealing with is not what v points at
       rs match{
         case Nil => throw new Exception("Trying to simplify a non-existent value")
-        case AZERO :: rs1 => flats_vsimp(rs1, rel_dist - 1, remove(v))
-        case AALTS(bs, rs1) :: rs2 => flats_vsimp(rs2, rel_dist - 1, augment(v, rs1.length - 1))//rs1 is guaranteed to have a len geq 2
-        case r1 :: rs2 => flats_vsimp(rs2, rel_dist - 1, v)
+        case AZERO :: rs1 => right_shift(rs1, rel_dist - 1, remove(v))
+        case AALTS(bs, rs1) :: rs2 => right_shift(rs2, rel_dist - 1, augment(v, rs1.length - 1))//rs1 is guaranteed to have a len geq 2
+        case r1 :: rs2 => right_shift(rs2, rel_dist - 1, v)
       }
     }
     else if(rel_dist == 0){//we are dealing with regex v is pointing to -- "v->r itself"
       rs match{//r1 cannot be zero
-        AALTS(bs, rs1) :: rs2 => flats_vsimp(  )
+        AALTS(bs, rs1) :: rs2 => right_shift(  )
         AZERO::rs2 => throw new Exception("Value corresponds to zero")
-        r1::rs2 => flats_vsimp(rs2, rel_dist - 1, v)
+        r1::rs2 => right_shift(rs2, rel_dist - 1, v)
       }
 
     }
@@ -470,12 +527,13 @@
 
     }
     */
-  def flats_vsimp(rs: List[ARexp], position: Int): Int = (rs, position) match {
+  //gives how much the regex at i has shifted right after flatten(on a list of simplified regexes)
+  def right_shift(rs: List[ARexp], i: Int): Int = (rs, i) match {
     case (_, 0) => 0
     case (Nil, _) => 0
-    case (AZERO :: rs1, _) => flats_vsimp(rs1, position - 1) - 1
-    case (AALTS(bs, rs1) :: rs2, _) => rs1.length - 1 + flats_vsimp(rs2, position - 1)
-    case (r1 :: rs2, _) => flats_vsimp(rs2, position - 1)
+    case (AZERO :: rs1, _) => right_shift(rs1, i - 1) - 1
+    case (AALTS(bs, rs1) :: rs2, _) => rs1.length - 1 + right_shift(rs2, i - 1)
+    case (r1 :: rs2, _) => right_shift(rs2, i - 1)
   }
   def rflats(rs: List[Rexp]): List[Rexp] = rs match {
     case Nil => Nil
@@ -537,6 +595,12 @@
   }
   //only print at the top level
 
+  //find_pos returns the index of a certain regex in a list of regex
+  //the leftmost regex is given the index 0 and the regex next to it
+  //is given 1 and so on 
+  //it needs the value to point out which regex it wants to get index of
+  //find_pos_aux does essentially the same thing as find_pos, except that
+  //it receives an alts instead of a list of regular expressions
   def find_pos(v: Val, rs: List[ARexp]): Int = (v, rs) match{
     case (v, r::Nil) => 0
     case (Right(v), r::rs) => find_pos(v, rs) + 1
@@ -559,21 +623,134 @@
     case Right(v) => return simple_end(v)
     case v => return true
   }
-  def isend(v: Val, rs: List[ARexp], position: Int): Boolean = {//TODO: here the slice api i am not familiar with so this call might be incorrect and crash the bsimp2
-    val rsbh = rs.slice(position + 1, rs.length)
+
+  //tells if rs[i] after flatten is at the right end 
+  def isend(v: Val, rs: List[ARexp], i: Int): Boolean = {//TODO: here the slice api i am not familiar with so this call might be incorrect and crash the bsimp2
+    val rsbh = rs.slice(i + 1, rs.length)
     val out_end = if(flats(rsbh) == Nil) true else false
     val inner_end = simple_end(v)
     inner_end && out_end
   }
+  //get the coat of v and wears it on vs
   def get_coat(v: Val, rs: List[Rexp], vs: Val): Val = (v, rs) match{//the dual operation of remove(so-called by myself)
     case (Right(v), r::Nil) => Right(vs)
     case (Left(v), r::rs) => Left(vs) 
     case (Right(v), r::rs) => Right(get_coat(v, rs, vs))
   }
+  //coat does the job of "coating" a value
+  //given the number of right outside it
   def coat(v: Val, i: Int) : Val = i match {
     case 0 => v
     case i => coat(Right(v), i - 1)
   }
+  def decoat(v:Val, i: Int) : Val = i match {
+    case 0 => v
+    case i => v match {
+      case Right(v0) => decoat(v0, i - 1)
+      case _ => throw new Exception("bad args decoat")
+    }
+  }
+  //given a regex, and a value, return the rectification function for how to rebuild the original value from the simplified value
+  
+  def vunsimp(r: ARexp, v: Val) : Val => Val = (r, v) match {
+    case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match {
+        case ((AZERO, _), (_, _) )=> throw new Exception("bad arguments")
+        case ((_, _), (AZERO, _)) => throw new Exception("bad arguments")
+        case ((AONE(bs2), v1s) , (r2s, v2s)) => (vtails => Sequ(v1, vunsimp(r2, v2)(vtails)))  //(fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx.
+        case ((r1s, v1s), (r2s, v2s)) => (vsmall => vsmall match {
+          case Sequ(v1small, v2small) => Sequ(vunsimp(r1, v1)(v1small), vunsimp(r2, v2)(v2small))
+          case _ => {
+            println(vsmall) ;
+            throw new Exception("bad arguments sequ")
+          }
+          //(ASEQ(bs1, r1s, r2s),  Sequ(v1s, v2s))
+        })
+    }
+    case (AALTS(bs1, rs), v) => {
+      val init_ind = find_pos(v, rs)
+      val rightend1 = if(init_ind + 1 == rs.length) true else false
+      val inner_rectfunct = vunsimp(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to  match the regx rs[i]
+      val vpointr = bsimp2(rs(init_ind), remove(v, rs))
+      val target_vs = vpointr._2
+      val target_rs = vpointr._1
+
+      val rs_simp = rs.map(bsimp)
+      val target_vs_kernel = target_rs match {
+        case AALTS(bs2, rs2) => remove(target_vs, rs2)//remove the secondary layer of left and right
+        case r => target_vs
+      }
+      val target_vs_outerlayers = target_rs match {
+        case AALTS(bs2, rs2) => find_pos(target_vs, rs2)//remove the secondary layer of left and right
+        case r => 0
+      }
+      val rightend2 = target_rs match {
+        case AALTS(bs2, rs2) => if(find_pos(target_vs, rs2) == rs2.length - 1) true else false
+        case r => false
+      }
+      val isalts = target_rs match {
+        case AALTS(bs2, rs2) =>  true 
+        case r => false
+      }
+
+
+      val flat_res = flats(rs_simp)
+      val flats_shit1 = right_shift(rs_simp, init_ind)
+      val flats_shift2 = find_pos_aux(target_vs, target_rs)
+      val flats_shift =  flats_shit1 + flats_shift2//right_shift used to be called flats_vsimp
+      val new_ind = init_ind + flats_shift
+
+      val dist_res = distinctBy(flat_res, erase)
+      val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase)
+
+      val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list
+      {
+        coat(target_vs_kernel, front_part.length - 1)
+      }
+      else{
+        coat(Left(target_vs_kernel), front_part.length - 1)
+      }
+      if(rightend1){
+        if(rightend2){
+          kernel_coated: Val => 
+          decoat(kernel_coated, front_part.length - 1) match {
+            case Left(vk) => coat(inner_rectfunct(coat(vk, target_vs_outerlayers)), init_ind)//add clause: if rs_simp(init_ind) is an alts
+            case vk => coat(inner_rectfunct(coat(vk, target_vs_outerlayers)), init_ind)
+          }
+        }
+        else{
+          kernel_coated: Val => 
+          decoat(kernel_coated, front_part.length - 1) match {
+            case Left(vk) => if(isalts) coat(inner_rectfunct(coat(Left(vk), target_vs_outerlayers)), init_ind) 
+              else coat(inner_rectfunct(coat((vk), target_vs_outerlayers)), init_ind)//add clause: if rs_simp(init_ind) is an alts
+            case vk => if(isalts) coat(inner_rectfunct(coat(Left(vk), target_vs_outerlayers)), init_ind)
+              else coat(inner_rectfunct(coat((vk), target_vs_outerlayers)), init_ind)
+          }
+        }
+      }
+      else{
+        if(rightend2){
+          kernel_coated: Val => 
+          decoat(kernel_coated, front_part.length - 1) match {
+            case Left(vk) => coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind)//add clause: if rs_simp(init_ind) is an alts
+            case vk => coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind)
+          } 
+        }
+        else{
+          kernel_coated: Val => 
+          decoat(kernel_coated, front_part.length - 1) match {
+            case Left(vk) => if(isalts) coat(Left(inner_rectfunct(coat(Left(vk), target_vs_outerlayers))), init_ind)
+              else coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind)//add clause: if rs_simp(init_ind) is an alts
+            case vk => if(isalts) coat(Left(inner_rectfunct(coat(Left(vk), target_vs_outerlayers))), init_ind)
+              else coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind)
+          }     
+        }
+
+      }
+
+    }
+    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
+    case (r, v) => (v => v)
+  }
   //This version takes a regex and a value, return a simplified regex and its corresponding simplified value 
   def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r,v) match{
     case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match {
@@ -583,37 +760,32 @@
         case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s),  Sequ(v1s, v2s))
     }
     case (AALTS(bs1, rs), v) => {
-      //phase 1 transformation so that aalts(bs1, rs) => aalts(bs1, rsf) and v => vf
       val init_ind = find_pos(v, rs)
-      val vs = bsimp2(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to  match the regx rs[i]
-      //println(vs)
+      val vpointr = bsimp2(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to  match the regx rs[i]
+      val target_sv = vpointr._2
+      val target_sr = vpointr._1
+
       val rs_simp = rs.map(bsimp)
-      val vs_kernel = rs_simp(init_ind) match {
-        case AALTS(bs2, rs2) => remove(vs._2, rs2)//remove the secondary layer of left and right
-        case r => vs._2
+      val target_vs_kernel = target_sr match {
+        case AALTS(bs2, rs2) => remove(target_sv, rs2)//if rs(init_ind) after simp is also an alts, we remove the R(....L(v)) outside v
+        case r => target_sv
       }
       val flat_res = flats(rs_simp)
-      val vs_for_coating = if(isend(vs._2, rs_simp, init_ind)||flat_res.length == 1) vs_kernel else Left(vs_kernel)
-      val r_s = rs_simp(init_ind)//or vs._1
-      val shift = flats_vsimp(rs_simp, init_ind) + find_pos_aux(vs._2, rs_simp(init_ind))
-      val new_ind = init_ind + shift
-      val vf = coat(vs_for_coating, new_ind)
-      //flats2 returns a list of regex and a single v
-      //now |- vf: ALTS(bs1, flat_res)
-      //phase 2 transformation so that aalts(bs1, rsf) => aalts(bs, rsdb) and vf => vdb
+      val flats_shift1 = right_shift(rs_simp, init_ind)
+      val flats_base = find_pos_aux(target_sv, target_sr)
+      val flats_shift =  flats_shift1 + flats_base//right_shift used to be called flats_vsimp
+      val new_ind = init_ind + flats_shift
+
       val dist_res = distinctBy(flat_res, erase)
       val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase)
-      //val size_reduction = new_ind + 1 - front_part.length
+
       val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list
       {
-        coat(vs_kernel, front_part.length - 1)
+        coat(target_vs_kernel, front_part.length - 1)
       }
       else{
-        coat(Left(vs_kernel), front_part.length - 1)
+        coat(Left(target_vs_kernel), front_part.length - 1)
       }
-      //println(vdb)
-      //we don't need to transform vdb as this phase will not make enough changes to the regex to affect value.
-      //the above statement needs verification. but can be left as is now.
       dist_res match {
         case Nil => (AZERO, undefined)
         case s :: Nil => (fuse(bs1, s), vdb)
@@ -623,6 +795,13 @@
     //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
     case (r, v) => (r, v)  
   }
+  //the below are all residuals from the bsimp2 function 
+        //val vs_for_coating = if(isend(vs._2, rs_simp, init_ind)||flat_res.length == 1) vs_kernel else Left(vs_kernel)
+        //val vf = coat(vs_for_coating, new_ind)
+      //flats2 returns a list of regex and a single v
+      //now |- vf: ALTS(bs1, flat_res)
+      //phase 2 transformation so that aalts(bs1, rsf) => aalts(bs, rsdb) and vf => vdb
+            //val size_reduction = new_ind + 1 - front_part.length
   def vsimp(r: ARexp, v: Val): Val = bsimp2(r, v)._2
   /*This version was first intended for returning a function however a value would be simpler.
   def bsimp2(r: ARexp, v: Val): (ARexp, Val => Val) = (r,v) match{
@@ -643,7 +822,7 @@
       val vs_for_coating = if(isend(vs, rs_simp, init_ind)) vs_kernel else Left(vs_kernel)
 
       val r_s = rs_simp[init_ind]
-      val shift = flats_vsimp(vs, rs_simp, init_ind) + find_pos(vs, rs_simp[init_ind])
+      val shift = right_shift(vs, rs_simp, init_ind) + find_pos(vs, rs_simp[init_ind])
       val vf = coat(vs_for_coating, shift + init_ind)
 
       val flat_res = flats(rs_simp)//flats2 returns a list of regex and a single v
@@ -724,16 +903,16 @@
   }
   */
   // main unsimplified lexing function (produces a value)
-  def blex(r: ARexp, s: List[Char]) : Bits = s match {
+  def blex(s: List[Char], r: ARexp) : Bits = s match {
     case Nil => if (bnullable(r)) mkepsBC(r) else throw new Exception("Not matched")
     case c::cs => {
       val der_res = bder(c,r)
-      blex(der_res, cs)
+      blex(cs, der_res)
     }
   }
 
-  def bpre_lexing(r: Rexp, s: String) = blex(internalise(r), s.toList)
-  def blexing(r: Rexp, s: String) : Val = decode(r, blex(internalise(r), s.toList))
+  def bpre_lexing(r: Rexp, s: String) = blex( s.toList, internalise(r) )
+  def blexing(s: String, r: Rexp) : Val = decode(r, blex( s.toList, internalise(r) ) )
 
   var bder_time = 0L
   var bsimp_time = 0L