lex_blex_Frankensteined.scala
changeset 17 3241b1e71633
parent 16 c51178fa85fe
child 59 8ff7b7508824
equal deleted inserted replaced
16:c51178fa85fe 17:3241b1e71633
   382       case Nil => Nil
   382       case Nil => Nil
   383       case AZERO :: rs1 => flats(rs1)
   383       case AZERO :: rs1 => flats(rs1)
   384       case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
   384       case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
   385       case r1 :: rs2 => r1 :: flats(rs2)
   385       case r1 :: rs2 => r1 :: flats(rs2)
   386   }
   386   }
       
   387   /*
   387   def remove(v: Val): Val = v match{
   388   def remove(v: Val): Val = v match{
   388     case Right(v1) => v1
   389     case Right(v1) => v1
   389     case Left(v1) => v1
   390     case Left(v1) => v1
   390     case _ => throw new Exception("Not removable")
   391     case _ => throw new Exception("Not removable")
   391   }
   392   }*/
   392   def augment(v: Val, i: Int): Val = if(i > 1) augment(Right(v), i - 1) else Right(v)
   393   def augment(v: Val, i: Int): Val = if(i > 1) augment(Right(v), i - 1) else Right(v)
   393 //an overly complex version
   394 //an overly complex version
   394 /*
   395 /*
   395     if(rel_dist >0){//the regex we are dealing with is not what v points at
   396     if(rel_dist >0){//the regex we are dealing with is not what v points at
   396       rs match{
   397       rs match{
   446       }
   447       }
   447     }
   448     }
   448     //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
   449     //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
   449     case r => r
   450     case r => r
   450   }
   451   }
   451   def find_pos(v: Val, rs: List[Rexp]): Int = (v, rs) match{
   452   def find_pos(v: Val, rs: List[ARexp]): Int = (v, rs) match{
   452     case (Right(v), r::Nil) => 1
   453     case (v, r::Nil) => 0
       
   454     case (Right(v), r::rs) => find_pos(v, rs) + 1
   453     case (Left(v), r::rs) => 0
   455     case (Left(v), r::rs) => 0
   454     case (Right(v), r::rs) => find_pos(v, rs) + 1
   456     //case (v, _) => 0
   455     case (v, _) => 0
   457   }
   456   }
   458   def find_pos_aux(v: Val, r: ARexp): Int = r match {
   457   def remove(v: Val, rs: List[Rexp]) : Val = (v,rs) match {//remove the outmost layer of ALTS's Left and Right
   459     case AALTS(bs, rs) => find_pos(v, rs)
   458     case (Right(v), r::Nil) => v 
   460     case r => 0
       
   461   }
       
   462   def remove(v: Val, rs: List[ARexp]) : Val = (v,rs) match {//remove the outmost layer of ALTS's Left and Right
       
   463     //we have to use r to detect the bound of nested L/Rs
       
   464     case (v, r::Nil) => v
       
   465     case (Right(v), r::rs) => remove(v, rs) 
   459     case (Left(v), r::rs) => v 
   466     case (Left(v), r::rs) => v 
   460     case (Right(v), r::rs) => remove(v, rs)
   467     //case (v, r::Nil) => v
   461   }
   468   }
   462   def simple_end(v: Val): Boolean = v match {
   469   def simple_end(v: Val): Boolean = v match {
   463     case Left(v) => return false
   470     case Left(v) => return false
   464     case Right(v) => return simple_end(v)
   471     case Right(v) => return simple_end(v)
   465     case v => return true
   472     case v => return true
   466   }
   473   }
   467   def isend(v: Val, rs: List[ARexp], position: Int): Boolean = {
   474   def isend(v: Val, rs: List[ARexp], position: Int): Boolean = {//TODO: here the slice api i am not familiar with so this call might be incorrect and crash the bsimp2
   468     val rsbh = rs.slice(position, rs.length)
   475     val rsbh = rs.slice(position + 1, rs.length)
   469     val out_end = if(flats(rsbh) == Nil) true else false
   476     val out_end = if(flats(rsbh) == Nil) true else false
   470     val inner_end = simple_end(v)
   477     val inner_end = simple_end(v)
   471     inner_end && out_end
   478     inner_end && out_end
   472   }
   479   }
   473   def get_coat(v: Val, rs: List[Rexp], vs: Val): Val = (v, rs) match{//the dual operation of remove(so-called by myself)
   480   def get_coat(v: Val, rs: List[Rexp], vs: Val): Val = (v, rs) match{//the dual operation of remove(so-called by myself)
   477   }
   484   }
   478   def coat(v: Val, i: Int) : Val = i match {
   485   def coat(v: Val, i: Int) : Val = i match {
   479     case 0 => v
   486     case 0 => v
   480     case i => coat(Right(v), i - 1)
   487     case i => coat(Right(v), i - 1)
   481   }
   488   }
   482   /*
   489   //This version takes a regex and a value, return a simplified regex and its corresponding simplified value 
       
   490   def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r,v) match{
       
   491     case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match {
       
   492         case ((AZERO, _), (_, _) )=> (AZERO, undefined)
       
   493         case ((_, _), (AZERO, _)) => (AZERO, undefined)
       
   494         case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx.
       
   495         case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s),  Sequ(v1s, v2s))
       
   496     }
       
   497     case (AALTS(bs1, rs), v) => {
       
   498       //phase 1 transformation so that aalts(bs1, rs) => aalts(bs1, rsf) and v => vf
       
   499       val init_ind = find_pos(v, rs)
       
   500       //println(rs)
       
   501       //println(v)
       
   502       val vs = bsimp2(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to  match the regx rs[i]
       
   503       //println(vs)
       
   504       val rs_simp = rs.map(bsimp)
       
   505       val vs_kernel = rs_simp(init_ind) match {
       
   506         case AALTS(bs2, rs2) => remove(vs._2, rs2)//remove the secondary layer of left and right
       
   507         case r => vs._2
       
   508       }
       
   509       val flat_res = flats(rs_simp)
       
   510       //println(rs_simp)
       
   511       //println(flat_res)
       
   512       //println(init_ind)
       
   513       val vs_for_coating = if(isend(vs._2, rs_simp, init_ind)||flat_res.length == 1) vs_kernel else Left(vs_kernel)
       
   514       //println(vs_for_coating)
       
   515       val r_s = rs_simp(init_ind)//or vs._1
       
   516       val shift = flats_vsimp(rs_simp, init_ind) + find_pos_aux(vs._2, rs_simp(init_ind))
       
   517       //println(shift)
       
   518       val new_ind = init_ind + shift
       
   519       //println("new ind:")
       
   520       //println(new_ind)
       
   521       val vf = coat(vs_for_coating, new_ind)
       
   522       //println("vf:")
       
   523       //println(vf)
       
   524       //flats2 returns a list of regex and a single v
       
   525       //now |- vf: ALTS(bs1, flat_res)
       
   526 
       
   527       //phase 2 transformation so that aalts(bs1, rsf) => aalts(bs, rsdb) and vf => vdb
       
   528       val dist_res = distinctBy(flat_res, erase)
       
   529       val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase)
       
   530       //val size_reduction = new_ind + 1 - front_part.length
       
   531       //println(flat_res.length)
       
   532       //println(dist_res)
       
   533       //println(front_part)
       
   534       val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list
       
   535       {
       
   536         coat(vs_kernel, front_part.length - 1)
       
   537       }
       
   538       else{
       
   539         coat(Left(vs_kernel), front_part.length - 1)
       
   540       }
       
   541       //println(vdb)
       
   542       //we don't need to transform vdb as this phase will not make enough changes to the regex to affect value.
       
   543       //the above statement needs verification. but can be left as is now.
       
   544       dist_res match {
       
   545         case Nil => (AZERO, undefined)
       
   546         case s :: Nil => (fuse(bs1, s), vdb)
       
   547         case rs => (AALTS(bs1, rs), vdb)
       
   548       }
       
   549     }
       
   550     //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
       
   551     case (r, v) => (r, v)  
       
   552   }
       
   553   def vsimp(r: ARexp, v: Val): Val = bsimp2(r, v)._2
       
   554   /*This version was first intended for returning a function however a value would be simpler.
   483   def bsimp2(r: ARexp, v: Val): (ARexp, Val => Val) = (r,v) match{
   555   def bsimp2(r: ARexp, v: Val): (ARexp, Val => Val) = (r,v) match{
   484     case (ASEQ(bs1, r1, r2), v) => (bsimp2(r1), bsimp2(r2)) match {
   556     case (ASEQ(bs1, r1, r2), v) => (bsimp2(r1), bsimp2(r2)) match {
   485         case ((AZERO, _), (_, _) )=> (AZERO, undefined)
   557         case ((AZERO, _), (_, _) )=> (AZERO, undefined)
   486         case ((_, _), (AZERO, _)) => (AZERO, undefined)
   558         case ((_, _), (AZERO, _)) => (AZERO, undefined)
   487         case ((AONE(bs2), f1) , (r2s, f2)) => (fuse(bs1 ++ bs2, r2s), lambda v => v match { case Sequ(_, v) => f2(v) } )
   559         case ((AONE(bs2), f1) , (r2s, f2)) => (fuse(bs1 ++ bs2, r2s), lambda v => v match { case Sequ(_, v) => f2(v) } )
   888 case class Sequ(v1: Val, v2: Val) extends Val
   960 case class Sequ(v1: Val, v2: Val) extends Val
   889 case class Left(v: Val) extends Val
   961 case class Left(v: Val) extends Val
   890 case class Right(v: Val) extends Val
   962 case class Right(v: Val) extends Val
   891 case class Stars(vs: List[Val]) extends Val
   963 case class Stars(vs: List[Val]) extends Val
   892 case class Rec(x: String, v: Val) extends Val
   964 case class Rec(x: String, v: Val) extends Val
       
   965 case object undefined extends Val
   893 //case class Pos(i: Int, v: Val) extends Val
   966 //case class Pos(i: Int, v: Val) extends Val
   894 case object Prd extends Val
   967 case object Prd extends Val