446 } |
447 } |
447 } |
448 } |
448 //case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) |
449 //case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) |
449 case r => r |
450 case r => r |
450 } |
451 } |
451 def find_pos(v: Val, rs: List[Rexp]): Int = (v, rs) match{ |
452 def find_pos(v: Val, rs: List[ARexp]): Int = (v, rs) match{ |
452 case (Right(v), r::Nil) => 1 |
453 case (v, r::Nil) => 0 |
|
454 case (Right(v), r::rs) => find_pos(v, rs) + 1 |
453 case (Left(v), r::rs) => 0 |
455 case (Left(v), r::rs) => 0 |
454 case (Right(v), r::rs) => find_pos(v, rs) + 1 |
456 //case (v, _) => 0 |
455 case (v, _) => 0 |
457 } |
456 } |
458 def find_pos_aux(v: Val, r: ARexp): Int = r match { |
457 def remove(v: Val, rs: List[Rexp]) : Val = (v,rs) match {//remove the outmost layer of ALTS's Left and Right |
459 case AALTS(bs, rs) => find_pos(v, rs) |
458 case (Right(v), r::Nil) => v |
460 case r => 0 |
|
461 } |
|
462 def remove(v: Val, rs: List[ARexp]) : Val = (v,rs) match {//remove the outmost layer of ALTS's Left and Right |
|
463 //we have to use r to detect the bound of nested L/Rs |
|
464 case (v, r::Nil) => v |
|
465 case (Right(v), r::rs) => remove(v, rs) |
459 case (Left(v), r::rs) => v |
466 case (Left(v), r::rs) => v |
460 case (Right(v), r::rs) => remove(v, rs) |
467 //case (v, r::Nil) => v |
461 } |
468 } |
462 def simple_end(v: Val): Boolean = v match { |
469 def simple_end(v: Val): Boolean = v match { |
463 case Left(v) => return false |
470 case Left(v) => return false |
464 case Right(v) => return simple_end(v) |
471 case Right(v) => return simple_end(v) |
465 case v => return true |
472 case v => return true |
466 } |
473 } |
467 def isend(v: Val, rs: List[ARexp], position: Int): Boolean = { |
474 def isend(v: Val, rs: List[ARexp], position: Int): Boolean = {//TODO: here the slice api i am not familiar with so this call might be incorrect and crash the bsimp2 |
468 val rsbh = rs.slice(position, rs.length) |
475 val rsbh = rs.slice(position + 1, rs.length) |
469 val out_end = if(flats(rsbh) == Nil) true else false |
476 val out_end = if(flats(rsbh) == Nil) true else false |
470 val inner_end = simple_end(v) |
477 val inner_end = simple_end(v) |
471 inner_end && out_end |
478 inner_end && out_end |
472 } |
479 } |
473 def get_coat(v: Val, rs: List[Rexp], vs: Val): Val = (v, rs) match{//the dual operation of remove(so-called by myself) |
480 def get_coat(v: Val, rs: List[Rexp], vs: Val): Val = (v, rs) match{//the dual operation of remove(so-called by myself) |
477 } |
484 } |
478 def coat(v: Val, i: Int) : Val = i match { |
485 def coat(v: Val, i: Int) : Val = i match { |
479 case 0 => v |
486 case 0 => v |
480 case i => coat(Right(v), i - 1) |
487 case i => coat(Right(v), i - 1) |
481 } |
488 } |
482 /* |
489 //This version takes a regex and a value, return a simplified regex and its corresponding simplified value |
|
490 def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r,v) match{ |
|
491 case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match { |
|
492 case ((AZERO, _), (_, _) )=> (AZERO, undefined) |
|
493 case ((_, _), (AZERO, _)) => (AZERO, undefined) |
|
494 case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx. |
|
495 case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s), Sequ(v1s, v2s)) |
|
496 } |
|
497 case (AALTS(bs1, rs), v) => { |
|
498 //phase 1 transformation so that aalts(bs1, rs) => aalts(bs1, rsf) and v => vf |
|
499 val init_ind = find_pos(v, rs) |
|
500 //println(rs) |
|
501 //println(v) |
|
502 val vs = bsimp2(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to match the regx rs[i] |
|
503 //println(vs) |
|
504 val rs_simp = rs.map(bsimp) |
|
505 val vs_kernel = rs_simp(init_ind) match { |
|
506 case AALTS(bs2, rs2) => remove(vs._2, rs2)//remove the secondary layer of left and right |
|
507 case r => vs._2 |
|
508 } |
|
509 val flat_res = flats(rs_simp) |
|
510 //println(rs_simp) |
|
511 //println(flat_res) |
|
512 //println(init_ind) |
|
513 val vs_for_coating = if(isend(vs._2, rs_simp, init_ind)||flat_res.length == 1) vs_kernel else Left(vs_kernel) |
|
514 //println(vs_for_coating) |
|
515 val r_s = rs_simp(init_ind)//or vs._1 |
|
516 val shift = flats_vsimp(rs_simp, init_ind) + find_pos_aux(vs._2, rs_simp(init_ind)) |
|
517 //println(shift) |
|
518 val new_ind = init_ind + shift |
|
519 //println("new ind:") |
|
520 //println(new_ind) |
|
521 val vf = coat(vs_for_coating, new_ind) |
|
522 //println("vf:") |
|
523 //println(vf) |
|
524 //flats2 returns a list of regex and a single v |
|
525 //now |- vf: ALTS(bs1, flat_res) |
|
526 |
|
527 //phase 2 transformation so that aalts(bs1, rsf) => aalts(bs, rsdb) and vf => vdb |
|
528 val dist_res = distinctBy(flat_res, erase) |
|
529 val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase) |
|
530 //val size_reduction = new_ind + 1 - front_part.length |
|
531 //println(flat_res.length) |
|
532 //println(dist_res) |
|
533 //println(front_part) |
|
534 val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list |
|
535 { |
|
536 coat(vs_kernel, front_part.length - 1) |
|
537 } |
|
538 else{ |
|
539 coat(Left(vs_kernel), front_part.length - 1) |
|
540 } |
|
541 //println(vdb) |
|
542 //we don't need to transform vdb as this phase will not make enough changes to the regex to affect value. |
|
543 //the above statement needs verification. but can be left as is now. |
|
544 dist_res match { |
|
545 case Nil => (AZERO, undefined) |
|
546 case s :: Nil => (fuse(bs1, s), vdb) |
|
547 case rs => (AALTS(bs1, rs), vdb) |
|
548 } |
|
549 } |
|
550 //case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) |
|
551 case (r, v) => (r, v) |
|
552 } |
|
553 def vsimp(r: ARexp, v: Val): Val = bsimp2(r, v)._2 |
|
554 /*This version was first intended for returning a function however a value would be simpler. |
483 def bsimp2(r: ARexp, v: Val): (ARexp, Val => Val) = (r,v) match{ |
555 def bsimp2(r: ARexp, v: Val): (ARexp, Val => Val) = (r,v) match{ |
484 case (ASEQ(bs1, r1, r2), v) => (bsimp2(r1), bsimp2(r2)) match { |
556 case (ASEQ(bs1, r1, r2), v) => (bsimp2(r1), bsimp2(r2)) match { |
485 case ((AZERO, _), (_, _) )=> (AZERO, undefined) |
557 case ((AZERO, _), (_, _) )=> (AZERO, undefined) |
486 case ((_, _), (AZERO, _)) => (AZERO, undefined) |
558 case ((_, _), (AZERO, _)) => (AZERO, undefined) |
487 case ((AONE(bs2), f1) , (r2s, f2)) => (fuse(bs1 ++ bs2, r2s), lambda v => v match { case Sequ(_, v) => f2(v) } ) |
559 case ((AONE(bs2), f1) , (r2s, f2)) => (fuse(bs1 ++ bs2, r2s), lambda v => v match { case Sequ(_, v) => f2(v) } ) |