lex_blex_Frankensteined.scala
changeset 148 c8ef391dd6f7
parent 109 79f347cb8b4d
child 150 b51d34113d47
equal deleted inserted replaced
147:dfcf3fa58d7f 148:c8ef391dd6f7
   135     case Sequ(v1, v2) => code(v1) ::: code(v2)
   135     case Sequ(v1, v2) => code(v1) ::: code(v2)
   136     case Stars(Nil) => Z::Nil
   136     case Stars(Nil) => Z::Nil
   137     case Stars(v::vs) => S::code(v) ::: code(Stars(vs))
   137     case Stars(v::vs) => S::code(v) ::: code(Stars(vs))
   138   }
   138   }
   139 
   139 
   140 
   140   //note that left and right are not recorded
       
   141   //although they guide the retrival
       
   142   //in contrast with stars 
   141   def retrieve(r: ARexp, v: Val): Bits = (r,v) match {
   143   def retrieve(r: ARexp, v: Val): Bits = (r,v) match {
   142     case (AONE(bs), Empty) => bs
   144     case (AONE(bs), Empty) => bs
   143     case (ACHAR(bs, c), Chr(d)) => bs
   145     case (ACHAR(bs, c), Chr(d)) => bs
   144     case (AALTS(bs, a::Nil), v) => bs ++ retrieve(a, v)
   146     case (AALTS(bs, a::Nil), v) => bs ++ retrieve(a, v)
   145     case (AALTS(bs, as), Left(v)) => bs ++ retrieve(as.head,v)
   147     case (AALTS(bs, as), Left(v)) => bs ++ retrieve(as.head,v)
   254     case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   256     case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
   255     case STAR(r) => Stars(Nil)
   257     case STAR(r) => Stars(Nil)
   256     case RECD(x, r) => Rec(x, mkeps(r))
   258     case RECD(x, r) => Rec(x, mkeps(r))
   257     //case PLUS(r) => Stars(List(mkeps(r)))
   259     //case PLUS(r) => Stars(List(mkeps(r)))
   258   }
   260   }
   259 
   261   def haschr(r: Rexp) : Boolean = r match {
       
   262     case CHAR(c) => true
       
   263     case STAR(r0) => haschr(r0)
       
   264     case SEQ(r1, r2) => haschr(r1) && nullable(r2) || haschr(r2) && nullable(r1)
       
   265     case ALTS(List(r1, r2)) => haschr(r1) || haschr(r2)
       
   266     case ONE => false
       
   267     case ZERO => false
       
   268   }
       
   269   def haschar(r: Rexp, c: Char) : Boolean = r match {
       
   270     case CHAR(d) => if(c == d) true else false
       
   271     case SEQ(r1, r2) => if(haschar(r1, c) && nullable(r2)) true else if(haschar(r2, c) && nullable(r1) ) true else false
       
   272     case STAR(r) => if(haschar(r, c)) true else false
       
   273     case ALTS(List(r1, r2)) => if(haschar(r1, c) || haschar(r2, c)) true else false
       
   274     case ONE => false
       
   275     case ZERO => false 
       
   276   }
       
   277   def mkchr(r: Rexp) : Val = r match {
       
   278     case SEQ(r1, r2) => 
       
   279       if(haschr(r1) && nullable(r2)) Sequ(mkchr(r1), mkeps(r2)) 
       
   280       else if(nullable(r1) && haschr(r2)) Sequ(mkeps(r1), mkchr(r2))
       
   281       else throw new Exception(r.toString)
       
   282     case ALTS(List(r1, r2)) => if (haschr(r1)) Left(mkchr(r1)) else Right(mkchr(r2))
       
   283     case STAR(r0) => Stars(List(mkchr(r0)))
       
   284     case CHAR(c) => Chr(c)
       
   285     case _ => throw new Exception("the regex you gave me can't make a char")
       
   286   }
       
   287   def mkchar(r: Rexp, c: Char) : Val = r match {
       
   288     case CHAR(d) => Chr(c)//if(c == d)  Chr(c) else error
       
   289     case ALTS(List(r1, r2)) =>
       
   290       if (haschar(r1, c)) Left(mkchar(r1, c)) else Right(mkchar(r2, c))
       
   291 
       
   292     case SEQ(r1, r2) => {if(haschar(r1, c)) Sequ(mkchar(r1, c), mkeps(r2)) else Sequ(mkeps(r1), mkchar(r2, c))}
       
   293     case STAR(r) =>Stars(List(mkchar(r, c)))
       
   294   }
   260   def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   295   def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
   261     case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   296     case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   262     case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   297     case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
   263     case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   298     case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
   264     case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   299     case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
   266     case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2))
   301     case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2))
   267     case (CHAR(_), Empty) => Chr(c) 
   302     case (CHAR(_), Empty) => Chr(c) 
   268     case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
   303     case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
   269     //case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   304     //case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
   270   }
   305   }
       
   306   def fuzzy_inj(r: ARexp, c: Char, v: Val) : Val = v match {
       
   307     case Stars(vs) => r match {//vs
       
   308       case ASTAR(bs, r0) => inj(     erase(r), c, Sequ(mkeps(erase(bder(c, r0))), v)     )
       
   309       case ASEQ(bs, r1, r2) => inj(erase(r), c, Sequ(mkeps(erase(bder(c, r1))), v) )
       
   310     }
       
   311     case Sequ(v1, v2) => r match {
       
   312       case ASTAR(bs, r0) => inj(erase(r), c, Sequ(mkchar(erase(bder(c, r0)), c), v2))
       
   313       case _ => v
       
   314     }
       
   315     case _ => v
       
   316   }
       
   317   /*def gen_rect(r: Rexp) : Val => Val = {
       
   318     //lingqi
       
   319     //buyao sanle 
       
   320     val r1 = bsimp(r)
       
   321 
       
   322   }
       
   323   def fuzzy_inj(r: ARexp, c: Char, v: Val) : Val = {
       
   324     val f = gen_rect(r)
       
   325     val vo = f(v)
       
   326     inj(r, c, vo)
       
   327   }*/
   271   def lex(r: Rexp, s: List[Char]) : Val = s match {
   328   def lex(r: Rexp, s: List[Char]) : Val = s match {
   272     case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
   329     case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
   273     case c::cs => inj(r, c, lex(der(c, r), cs))
   330     case c::cs => inj(r, c, lex(der(c, r), cs))
   274   }
   331   }
   275 
   332 
   451 //an overly complex version
   508 //an overly complex version
   452 /*
   509 /*
   453     if(rel_dist >0){//the regex we are dealing with is not what v points at
   510     if(rel_dist >0){//the regex we are dealing with is not what v points at
   454       rs match{
   511       rs match{
   455         case Nil => throw new Exception("Trying to simplify a non-existent value")
   512         case Nil => throw new Exception("Trying to simplify a non-existent value")
   456         case AZERO :: rs1 => flats_vsimp(rs1, rel_dist - 1, remove(v))
   513         case AZERO :: rs1 => right_shift(rs1, rel_dist - 1, remove(v))
   457         case AALTS(bs, rs1) :: rs2 => flats_vsimp(rs2, rel_dist - 1, augment(v, rs1.length - 1))//rs1 is guaranteed to have a len geq 2
   514         case AALTS(bs, rs1) :: rs2 => right_shift(rs2, rel_dist - 1, augment(v, rs1.length - 1))//rs1 is guaranteed to have a len geq 2
   458         case r1 :: rs2 => flats_vsimp(rs2, rel_dist - 1, v)
   515         case r1 :: rs2 => right_shift(rs2, rel_dist - 1, v)
   459       }
   516       }
   460     }
   517     }
   461     else if(rel_dist == 0){//we are dealing with regex v is pointing to -- "v->r itself"
   518     else if(rel_dist == 0){//we are dealing with regex v is pointing to -- "v->r itself"
   462       rs match{//r1 cannot be zero
   519       rs match{//r1 cannot be zero
   463         AALTS(bs, rs1) :: rs2 => flats_vsimp(  )
   520         AALTS(bs, rs1) :: rs2 => right_shift(  )
   464         AZERO::rs2 => throw new Exception("Value corresponds to zero")
   521         AZERO::rs2 => throw new Exception("Value corresponds to zero")
   465         r1::rs2 => flats_vsimp(rs2, rel_dist - 1, v)
   522         r1::rs2 => right_shift(rs2, rel_dist - 1, v)
   466       }
   523       }
   467 
   524 
   468     }
   525     }
   469     else{
   526     else{
   470 
   527 
   471     }
   528     }
   472     */
   529     */
   473   def flats_vsimp(rs: List[ARexp], position: Int): Int = (rs, position) match {
   530   //gives how much the regex at i has shifted right after flatten(on a list of simplified regexes)
       
   531   def right_shift(rs: List[ARexp], i: Int): Int = (rs, i) match {
   474     case (_, 0) => 0
   532     case (_, 0) => 0
   475     case (Nil, _) => 0
   533     case (Nil, _) => 0
   476     case (AZERO :: rs1, _) => flats_vsimp(rs1, position - 1) - 1
   534     case (AZERO :: rs1, _) => right_shift(rs1, i - 1) - 1
   477     case (AALTS(bs, rs1) :: rs2, _) => rs1.length - 1 + flats_vsimp(rs2, position - 1)
   535     case (AALTS(bs, rs1) :: rs2, _) => rs1.length - 1 + right_shift(rs2, i - 1)
   478     case (r1 :: rs2, _) => flats_vsimp(rs2, position - 1)
   536     case (r1 :: rs2, _) => right_shift(rs2, i - 1)
   479   }
   537   }
   480   def rflats(rs: List[Rexp]): List[Rexp] = rs match {
   538   def rflats(rs: List[Rexp]): List[Rexp] = rs match {
   481     case Nil => Nil
   539     case Nil => Nil
   482     case ZERO :: rs1 => rflats(rs1)
   540     case ZERO :: rs1 => rflats(rs1)
   483     case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2)
   541     case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2)
   535     //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
   593     //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
   536     case r => r   
   594     case r => r   
   537   }
   595   }
   538   //only print at the top level
   596   //only print at the top level
   539 
   597 
       
   598   //find_pos returns the index of a certain regex in a list of regex
       
   599   //the leftmost regex is given the index 0 and the regex next to it
       
   600   //is given 1 and so on 
       
   601   //it needs the value to point out which regex it wants to get index of
       
   602   //find_pos_aux does essentially the same thing as find_pos, except that
       
   603   //it receives an alts instead of a list of regular expressions
   540   def find_pos(v: Val, rs: List[ARexp]): Int = (v, rs) match{
   604   def find_pos(v: Val, rs: List[ARexp]): Int = (v, rs) match{
   541     case (v, r::Nil) => 0
   605     case (v, r::Nil) => 0
   542     case (Right(v), r::rs) => find_pos(v, rs) + 1
   606     case (Right(v), r::rs) => find_pos(v, rs) + 1
   543     case (Left(v), r::rs) => 0
   607     case (Left(v), r::rs) => 0
   544     //case (v, _) => 0
   608     //case (v, _) => 0
   557   def simple_end(v: Val): Boolean = v match {
   621   def simple_end(v: Val): Boolean = v match {
   558     case Left(v) => return false
   622     case Left(v) => return false
   559     case Right(v) => return simple_end(v)
   623     case Right(v) => return simple_end(v)
   560     case v => return true
   624     case v => return true
   561   }
   625   }
   562   def isend(v: Val, rs: List[ARexp], position: Int): Boolean = {//TODO: here the slice api i am not familiar with so this call might be incorrect and crash the bsimp2
   626 
   563     val rsbh = rs.slice(position + 1, rs.length)
   627   //tells if rs[i] after flatten is at the right end 
       
   628   def isend(v: Val, rs: List[ARexp], i: Int): Boolean = {//TODO: here the slice api i am not familiar with so this call might be incorrect and crash the bsimp2
       
   629     val rsbh = rs.slice(i + 1, rs.length)
   564     val out_end = if(flats(rsbh) == Nil) true else false
   630     val out_end = if(flats(rsbh) == Nil) true else false
   565     val inner_end = simple_end(v)
   631     val inner_end = simple_end(v)
   566     inner_end && out_end
   632     inner_end && out_end
   567   }
   633   }
       
   634   //get the coat of v and wears it on vs
   568   def get_coat(v: Val, rs: List[Rexp], vs: Val): Val = (v, rs) match{//the dual operation of remove(so-called by myself)
   635   def get_coat(v: Val, rs: List[Rexp], vs: Val): Val = (v, rs) match{//the dual operation of remove(so-called by myself)
   569     case (Right(v), r::Nil) => Right(vs)
   636     case (Right(v), r::Nil) => Right(vs)
   570     case (Left(v), r::rs) => Left(vs) 
   637     case (Left(v), r::rs) => Left(vs) 
   571     case (Right(v), r::rs) => Right(get_coat(v, rs, vs))
   638     case (Right(v), r::rs) => Right(get_coat(v, rs, vs))
   572   }
   639   }
       
   640   //coat does the job of "coating" a value
       
   641   //given the number of right outside it
   573   def coat(v: Val, i: Int) : Val = i match {
   642   def coat(v: Val, i: Int) : Val = i match {
   574     case 0 => v
   643     case 0 => v
   575     case i => coat(Right(v), i - 1)
   644     case i => coat(Right(v), i - 1)
       
   645   }
       
   646   def decoat(v:Val, i: Int) : Val = i match {
       
   647     case 0 => v
       
   648     case i => v match {
       
   649       case Right(v0) => decoat(v0, i - 1)
       
   650       case _ => throw new Exception("bad args decoat")
       
   651     }
       
   652   }
       
   653   //given a regex, and a value, return the rectification function for how to rebuild the original value from the simplified value
       
   654   
       
   655   def vunsimp(r: ARexp, v: Val) : Val => Val = (r, v) match {
       
   656     case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match {
       
   657         case ((AZERO, _), (_, _) )=> throw new Exception("bad arguments")
       
   658         case ((_, _), (AZERO, _)) => throw new Exception("bad arguments")
       
   659         case ((AONE(bs2), v1s) , (r2s, v2s)) => (vtails => Sequ(v1, vunsimp(r2, v2)(vtails)))  //(fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx.
       
   660         case ((r1s, v1s), (r2s, v2s)) => (vsmall => vsmall match {
       
   661           case Sequ(v1small, v2small) => Sequ(vunsimp(r1, v1)(v1small), vunsimp(r2, v2)(v2small))
       
   662           case _ => {
       
   663             println(vsmall) ;
       
   664             throw new Exception("bad arguments sequ")
       
   665           }
       
   666           //(ASEQ(bs1, r1s, r2s),  Sequ(v1s, v2s))
       
   667         })
       
   668     }
       
   669     case (AALTS(bs1, rs), v) => {
       
   670       val init_ind = find_pos(v, rs)
       
   671       val rightend1 = if(init_ind + 1 == rs.length) true else false
       
   672       val inner_rectfunct = vunsimp(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to  match the regx rs[i]
       
   673       val vpointr = bsimp2(rs(init_ind), remove(v, rs))
       
   674       val target_vs = vpointr._2
       
   675       val target_rs = vpointr._1
       
   676 
       
   677       val rs_simp = rs.map(bsimp)
       
   678       val target_vs_kernel = target_rs match {
       
   679         case AALTS(bs2, rs2) => remove(target_vs, rs2)//remove the secondary layer of left and right
       
   680         case r => target_vs
       
   681       }
       
   682       val target_vs_outerlayers = target_rs match {
       
   683         case AALTS(bs2, rs2) => find_pos(target_vs, rs2)//remove the secondary layer of left and right
       
   684         case r => 0
       
   685       }
       
   686       val rightend2 = target_rs match {
       
   687         case AALTS(bs2, rs2) => if(find_pos(target_vs, rs2) == rs2.length - 1) true else false
       
   688         case r => false
       
   689       }
       
   690       val isalts = target_rs match {
       
   691         case AALTS(bs2, rs2) =>  true 
       
   692         case r => false
       
   693       }
       
   694 
       
   695 
       
   696       val flat_res = flats(rs_simp)
       
   697       val flats_shit1 = right_shift(rs_simp, init_ind)
       
   698       val flats_shift2 = find_pos_aux(target_vs, target_rs)
       
   699       val flats_shift =  flats_shit1 + flats_shift2//right_shift used to be called flats_vsimp
       
   700       val new_ind = init_ind + flats_shift
       
   701 
       
   702       val dist_res = distinctBy(flat_res, erase)
       
   703       val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase)
       
   704 
       
   705       val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list
       
   706       {
       
   707         coat(target_vs_kernel, front_part.length - 1)
       
   708       }
       
   709       else{
       
   710         coat(Left(target_vs_kernel), front_part.length - 1)
       
   711       }
       
   712       if(rightend1){
       
   713         if(rightend2){
       
   714           kernel_coated: Val => 
       
   715           decoat(kernel_coated, front_part.length - 1) match {
       
   716             case Left(vk) => coat(inner_rectfunct(coat(vk, target_vs_outerlayers)), init_ind)//add clause: if rs_simp(init_ind) is an alts
       
   717             case vk => coat(inner_rectfunct(coat(vk, target_vs_outerlayers)), init_ind)
       
   718           }
       
   719         }
       
   720         else{
       
   721           kernel_coated: Val => 
       
   722           decoat(kernel_coated, front_part.length - 1) match {
       
   723             case Left(vk) => if(isalts) coat(inner_rectfunct(coat(Left(vk), target_vs_outerlayers)), init_ind) 
       
   724               else coat(inner_rectfunct(coat((vk), target_vs_outerlayers)), init_ind)//add clause: if rs_simp(init_ind) is an alts
       
   725             case vk => if(isalts) coat(inner_rectfunct(coat(Left(vk), target_vs_outerlayers)), init_ind)
       
   726               else coat(inner_rectfunct(coat((vk), target_vs_outerlayers)), init_ind)
       
   727           }
       
   728         }
       
   729       }
       
   730       else{
       
   731         if(rightend2){
       
   732           kernel_coated: Val => 
       
   733           decoat(kernel_coated, front_part.length - 1) match {
       
   734             case Left(vk) => coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind)//add clause: if rs_simp(init_ind) is an alts
       
   735             case vk => coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind)
       
   736           } 
       
   737         }
       
   738         else{
       
   739           kernel_coated: Val => 
       
   740           decoat(kernel_coated, front_part.length - 1) match {
       
   741             case Left(vk) => if(isalts) coat(Left(inner_rectfunct(coat(Left(vk), target_vs_outerlayers))), init_ind)
       
   742               else coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind)//add clause: if rs_simp(init_ind) is an alts
       
   743             case vk => if(isalts) coat(Left(inner_rectfunct(coat(Left(vk), target_vs_outerlayers))), init_ind)
       
   744               else coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind)
       
   745           }     
       
   746         }
       
   747 
       
   748       }
       
   749 
       
   750     }
       
   751     //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
       
   752     case (r, v) => (v => v)
   576   }
   753   }
   577   //This version takes a regex and a value, return a simplified regex and its corresponding simplified value 
   754   //This version takes a regex and a value, return a simplified regex and its corresponding simplified value 
   578   def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r,v) match{
   755   def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r,v) match{
   579     case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match {
   756     case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match {
   580         case ((AZERO, _), (_, _) )=> (AZERO, undefined)
   757         case ((AZERO, _), (_, _) )=> (AZERO, undefined)
   581         case ((_, _), (AZERO, _)) => (AZERO, undefined)
   758         case ((_, _), (AZERO, _)) => (AZERO, undefined)
   582         case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx.
   759         case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx.
   583         case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s),  Sequ(v1s, v2s))
   760         case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s),  Sequ(v1s, v2s))
   584     }
   761     }
   585     case (AALTS(bs1, rs), v) => {
   762     case (AALTS(bs1, rs), v) => {
   586       //phase 1 transformation so that aalts(bs1, rs) => aalts(bs1, rsf) and v => vf
       
   587       val init_ind = find_pos(v, rs)
   763       val init_ind = find_pos(v, rs)
   588       val vs = bsimp2(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to  match the regx rs[i]
   764       val vpointr = bsimp2(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to  match the regx rs[i]
   589       //println(vs)
   765       val target_sv = vpointr._2
       
   766       val target_sr = vpointr._1
       
   767 
   590       val rs_simp = rs.map(bsimp)
   768       val rs_simp = rs.map(bsimp)
   591       val vs_kernel = rs_simp(init_ind) match {
   769       val target_vs_kernel = target_sr match {
   592         case AALTS(bs2, rs2) => remove(vs._2, rs2)//remove the secondary layer of left and right
   770         case AALTS(bs2, rs2) => remove(target_sv, rs2)//if rs(init_ind) after simp is also an alts, we remove the R(....L(v)) outside v
   593         case r => vs._2
   771         case r => target_sv
   594       }
   772       }
   595       val flat_res = flats(rs_simp)
   773       val flat_res = flats(rs_simp)
   596       val vs_for_coating = if(isend(vs._2, rs_simp, init_ind)||flat_res.length == 1) vs_kernel else Left(vs_kernel)
   774       val flats_shift1 = right_shift(rs_simp, init_ind)
   597       val r_s = rs_simp(init_ind)//or vs._1
   775       val flats_base = find_pos_aux(target_sv, target_sr)
   598       val shift = flats_vsimp(rs_simp, init_ind) + find_pos_aux(vs._2, rs_simp(init_ind))
   776       val flats_shift =  flats_shift1 + flats_base//right_shift used to be called flats_vsimp
   599       val new_ind = init_ind + shift
   777       val new_ind = init_ind + flats_shift
   600       val vf = coat(vs_for_coating, new_ind)
   778 
   601       //flats2 returns a list of regex and a single v
       
   602       //now |- vf: ALTS(bs1, flat_res)
       
   603       //phase 2 transformation so that aalts(bs1, rsf) => aalts(bs, rsdb) and vf => vdb
       
   604       val dist_res = distinctBy(flat_res, erase)
   779       val dist_res = distinctBy(flat_res, erase)
   605       val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase)
   780       val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase)
   606       //val size_reduction = new_ind + 1 - front_part.length
   781 
   607       val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list
   782       val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list
   608       {
   783       {
   609         coat(vs_kernel, front_part.length - 1)
   784         coat(target_vs_kernel, front_part.length - 1)
   610       }
   785       }
   611       else{
   786       else{
   612         coat(Left(vs_kernel), front_part.length - 1)
   787         coat(Left(target_vs_kernel), front_part.length - 1)
   613       }
   788       }
   614       //println(vdb)
       
   615       //we don't need to transform vdb as this phase will not make enough changes to the regex to affect value.
       
   616       //the above statement needs verification. but can be left as is now.
       
   617       dist_res match {
   789       dist_res match {
   618         case Nil => (AZERO, undefined)
   790         case Nil => (AZERO, undefined)
   619         case s :: Nil => (fuse(bs1, s), vdb)
   791         case s :: Nil => (fuse(bs1, s), vdb)
   620         case rs => (AALTS(bs1, rs), vdb)
   792         case rs => (AALTS(bs1, rs), vdb)
   621       }
   793       }
   622     }
   794     }
   623     //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
   795     //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
   624     case (r, v) => (r, v)  
   796     case (r, v) => (r, v)  
   625   }
   797   }
       
   798   //the below are all residuals from the bsimp2 function 
       
   799         //val vs_for_coating = if(isend(vs._2, rs_simp, init_ind)||flat_res.length == 1) vs_kernel else Left(vs_kernel)
       
   800         //val vf = coat(vs_for_coating, new_ind)
       
   801       //flats2 returns a list of regex and a single v
       
   802       //now |- vf: ALTS(bs1, flat_res)
       
   803       //phase 2 transformation so that aalts(bs1, rsf) => aalts(bs, rsdb) and vf => vdb
       
   804             //val size_reduction = new_ind + 1 - front_part.length
   626   def vsimp(r: ARexp, v: Val): Val = bsimp2(r, v)._2
   805   def vsimp(r: ARexp, v: Val): Val = bsimp2(r, v)._2
   627   /*This version was first intended for returning a function however a value would be simpler.
   806   /*This version was first intended for returning a function however a value would be simpler.
   628   def bsimp2(r: ARexp, v: Val): (ARexp, Val => Val) = (r,v) match{
   807   def bsimp2(r: ARexp, v: Val): (ARexp, Val => Val) = (r,v) match{
   629     case (ASEQ(bs1, r1, r2), v) => (bsimp2(r1), bsimp2(r2)) match {
   808     case (ASEQ(bs1, r1, r2), v) => (bsimp2(r1), bsimp2(r2)) match {
   630         case ((AZERO, _), (_, _) )=> (AZERO, undefined)
   809         case ((AZERO, _), (_, _) )=> (AZERO, undefined)
   641         case r => vs
   820         case r => vs
   642       }
   821       }
   643       val vs_for_coating = if(isend(vs, rs_simp, init_ind)) vs_kernel else Left(vs_kernel)
   822       val vs_for_coating = if(isend(vs, rs_simp, init_ind)) vs_kernel else Left(vs_kernel)
   644 
   823 
   645       val r_s = rs_simp[init_ind]
   824       val r_s = rs_simp[init_ind]
   646       val shift = flats_vsimp(vs, rs_simp, init_ind) + find_pos(vs, rs_simp[init_ind])
   825       val shift = right_shift(vs, rs_simp, init_ind) + find_pos(vs, rs_simp[init_ind])
   647       val vf = coat(vs_for_coating, shift + init_ind)
   826       val vf = coat(vs_for_coating, shift + init_ind)
   648 
   827 
   649       val flat_res = flats(rs_simp)//flats2 returns a list of regex and a single v
   828       val flat_res = flats(rs_simp)//flats2 returns a list of regex and a single v
   650       val dist_res = distinctBy(flat_res, erase)
   829       val dist_res = distinctBy(flat_res, erase)
   651       dist_res match {
   830       dist_res match {
   722     println((end - start)/1.0e9)
   901     println((end - start)/1.0e9)
   723     result
   902     result
   724   }
   903   }
   725   */
   904   */
   726   // main unsimplified lexing function (produces a value)
   905   // main unsimplified lexing function (produces a value)
   727   def blex(r: ARexp, s: List[Char]) : Bits = s match {
   906   def blex(s: List[Char], r: ARexp) : Bits = s match {
   728     case Nil => if (bnullable(r)) mkepsBC(r) else throw new Exception("Not matched")
   907     case Nil => if (bnullable(r)) mkepsBC(r) else throw new Exception("Not matched")
   729     case c::cs => {
   908     case c::cs => {
   730       val der_res = bder(c,r)
   909       val der_res = bder(c,r)
   731       blex(der_res, cs)
   910       blex(cs, der_res)
   732     }
   911     }
   733   }
   912   }
   734 
   913 
   735   def bpre_lexing(r: Rexp, s: String) = blex(internalise(r), s.toList)
   914   def bpre_lexing(r: Rexp, s: String) = blex( s.toList, internalise(r) )
   736   def blexing(r: Rexp, s: String) : Val = decode(r, blex(internalise(r), s.toList))
   915   def blexing(s: String, r: Rexp) : Val = decode(r, blex( s.toList, internalise(r) ) )
   737 
   916 
   738   var bder_time = 0L
   917   var bder_time = 0L
   739   var bsimp_time = 0L
   918   var bsimp_time = 0L
   740   var mkepsBC_time = 0L
   919   var mkepsBC_time = 0L
   741   var small_de = 2
   920   var small_de = 2