254 case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
256 case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
255 case STAR(r) => Stars(Nil) |
257 case STAR(r) => Stars(Nil) |
256 case RECD(x, r) => Rec(x, mkeps(r)) |
258 case RECD(x, r) => Rec(x, mkeps(r)) |
257 //case PLUS(r) => Stars(List(mkeps(r))) |
259 //case PLUS(r) => Stars(List(mkeps(r))) |
258 } |
260 } |
259 |
261 def haschr(r: Rexp) : Boolean = r match { |
|
262 case CHAR(c) => true |
|
263 case STAR(r0) => haschr(r0) |
|
264 case SEQ(r1, r2) => haschr(r1) && nullable(r2) || haschr(r2) && nullable(r1) |
|
265 case ALTS(List(r1, r2)) => haschr(r1) || haschr(r2) |
|
266 case ONE => false |
|
267 case ZERO => false |
|
268 } |
|
269 def haschar(r: Rexp, c: Char) : Boolean = r match { |
|
270 case CHAR(d) => if(c == d) true else false |
|
271 case SEQ(r1, r2) => if(haschar(r1, c) && nullable(r2)) true else if(haschar(r2, c) && nullable(r1) ) true else false |
|
272 case STAR(r) => if(haschar(r, c)) true else false |
|
273 case ALTS(List(r1, r2)) => if(haschar(r1, c) || haschar(r2, c)) true else false |
|
274 case ONE => false |
|
275 case ZERO => false |
|
276 } |
|
277 def mkchr(r: Rexp) : Val = r match { |
|
278 case SEQ(r1, r2) => |
|
279 if(haschr(r1) && nullable(r2)) Sequ(mkchr(r1), mkeps(r2)) |
|
280 else if(nullable(r1) && haschr(r2)) Sequ(mkeps(r1), mkchr(r2)) |
|
281 else throw new Exception(r.toString) |
|
282 case ALTS(List(r1, r2)) => if (haschr(r1)) Left(mkchr(r1)) else Right(mkchr(r2)) |
|
283 case STAR(r0) => Stars(List(mkchr(r0))) |
|
284 case CHAR(c) => Chr(c) |
|
285 case _ => throw new Exception("the regex you gave me can't make a char") |
|
286 } |
|
287 def mkchar(r: Rexp, c: Char) : Val = r match { |
|
288 case CHAR(d) => Chr(c)//if(c == d) Chr(c) else error |
|
289 case ALTS(List(r1, r2)) => |
|
290 if (haschar(r1, c)) Left(mkchar(r1, c)) else Right(mkchar(r2, c)) |
|
291 |
|
292 case SEQ(r1, r2) => {if(haschar(r1, c)) Sequ(mkchar(r1, c), mkeps(r2)) else Sequ(mkeps(r1), mkchar(r2, c))} |
|
293 case STAR(r) =>Stars(List(mkchar(r, c))) |
|
294 } |
260 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
295 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
261 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
296 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
262 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
297 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
263 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
298 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
264 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
299 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
557 def simple_end(v: Val): Boolean = v match { |
621 def simple_end(v: Val): Boolean = v match { |
558 case Left(v) => return false |
622 case Left(v) => return false |
559 case Right(v) => return simple_end(v) |
623 case Right(v) => return simple_end(v) |
560 case v => return true |
624 case v => return true |
561 } |
625 } |
562 def isend(v: Val, rs: List[ARexp], position: Int): Boolean = {//TODO: here the slice api i am not familiar with so this call might be incorrect and crash the bsimp2 |
626 |
563 val rsbh = rs.slice(position + 1, rs.length) |
627 //tells if rs[i] after flatten is at the right end |
|
628 def isend(v: Val, rs: List[ARexp], i: Int): Boolean = {//TODO: here the slice api i am not familiar with so this call might be incorrect and crash the bsimp2 |
|
629 val rsbh = rs.slice(i + 1, rs.length) |
564 val out_end = if(flats(rsbh) == Nil) true else false |
630 val out_end = if(flats(rsbh) == Nil) true else false |
565 val inner_end = simple_end(v) |
631 val inner_end = simple_end(v) |
566 inner_end && out_end |
632 inner_end && out_end |
567 } |
633 } |
|
634 //get the coat of v and wears it on vs |
568 def get_coat(v: Val, rs: List[Rexp], vs: Val): Val = (v, rs) match{//the dual operation of remove(so-called by myself) |
635 def get_coat(v: Val, rs: List[Rexp], vs: Val): Val = (v, rs) match{//the dual operation of remove(so-called by myself) |
569 case (Right(v), r::Nil) => Right(vs) |
636 case (Right(v), r::Nil) => Right(vs) |
570 case (Left(v), r::rs) => Left(vs) |
637 case (Left(v), r::rs) => Left(vs) |
571 case (Right(v), r::rs) => Right(get_coat(v, rs, vs)) |
638 case (Right(v), r::rs) => Right(get_coat(v, rs, vs)) |
572 } |
639 } |
|
640 //coat does the job of "coating" a value |
|
641 //given the number of right outside it |
573 def coat(v: Val, i: Int) : Val = i match { |
642 def coat(v: Val, i: Int) : Val = i match { |
574 case 0 => v |
643 case 0 => v |
575 case i => coat(Right(v), i - 1) |
644 case i => coat(Right(v), i - 1) |
|
645 } |
|
646 def decoat(v:Val, i: Int) : Val = i match { |
|
647 case 0 => v |
|
648 case i => v match { |
|
649 case Right(v0) => decoat(v0, i - 1) |
|
650 case _ => throw new Exception("bad args decoat") |
|
651 } |
|
652 } |
|
653 //given a regex, and a value, return the rectification function for how to rebuild the original value from the simplified value |
|
654 |
|
655 def vunsimp(r: ARexp, v: Val) : Val => Val = (r, v) match { |
|
656 case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match { |
|
657 case ((AZERO, _), (_, _) )=> throw new Exception("bad arguments") |
|
658 case ((_, _), (AZERO, _)) => throw new Exception("bad arguments") |
|
659 case ((AONE(bs2), v1s) , (r2s, v2s)) => (vtails => Sequ(v1, vunsimp(r2, v2)(vtails))) //(fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx. |
|
660 case ((r1s, v1s), (r2s, v2s)) => (vsmall => vsmall match { |
|
661 case Sequ(v1small, v2small) => Sequ(vunsimp(r1, v1)(v1small), vunsimp(r2, v2)(v2small)) |
|
662 case _ => { |
|
663 println(vsmall) ; |
|
664 throw new Exception("bad arguments sequ") |
|
665 } |
|
666 //(ASEQ(bs1, r1s, r2s), Sequ(v1s, v2s)) |
|
667 }) |
|
668 } |
|
669 case (AALTS(bs1, rs), v) => { |
|
670 val init_ind = find_pos(v, rs) |
|
671 val rightend1 = if(init_ind + 1 == rs.length) true else false |
|
672 val inner_rectfunct = vunsimp(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to match the regx rs[i] |
|
673 val vpointr = bsimp2(rs(init_ind), remove(v, rs)) |
|
674 val target_vs = vpointr._2 |
|
675 val target_rs = vpointr._1 |
|
676 |
|
677 val rs_simp = rs.map(bsimp) |
|
678 val target_vs_kernel = target_rs match { |
|
679 case AALTS(bs2, rs2) => remove(target_vs, rs2)//remove the secondary layer of left and right |
|
680 case r => target_vs |
|
681 } |
|
682 val target_vs_outerlayers = target_rs match { |
|
683 case AALTS(bs2, rs2) => find_pos(target_vs, rs2)//remove the secondary layer of left and right |
|
684 case r => 0 |
|
685 } |
|
686 val rightend2 = target_rs match { |
|
687 case AALTS(bs2, rs2) => if(find_pos(target_vs, rs2) == rs2.length - 1) true else false |
|
688 case r => false |
|
689 } |
|
690 val isalts = target_rs match { |
|
691 case AALTS(bs2, rs2) => true |
|
692 case r => false |
|
693 } |
|
694 |
|
695 |
|
696 val flat_res = flats(rs_simp) |
|
697 val flats_shit1 = right_shift(rs_simp, init_ind) |
|
698 val flats_shift2 = find_pos_aux(target_vs, target_rs) |
|
699 val flats_shift = flats_shit1 + flats_shift2//right_shift used to be called flats_vsimp |
|
700 val new_ind = init_ind + flats_shift |
|
701 |
|
702 val dist_res = distinctBy(flat_res, erase) |
|
703 val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase) |
|
704 |
|
705 val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list |
|
706 { |
|
707 coat(target_vs_kernel, front_part.length - 1) |
|
708 } |
|
709 else{ |
|
710 coat(Left(target_vs_kernel), front_part.length - 1) |
|
711 } |
|
712 if(rightend1){ |
|
713 if(rightend2){ |
|
714 kernel_coated: Val => |
|
715 decoat(kernel_coated, front_part.length - 1) match { |
|
716 case Left(vk) => coat(inner_rectfunct(coat(vk, target_vs_outerlayers)), init_ind)//add clause: if rs_simp(init_ind) is an alts |
|
717 case vk => coat(inner_rectfunct(coat(vk, target_vs_outerlayers)), init_ind) |
|
718 } |
|
719 } |
|
720 else{ |
|
721 kernel_coated: Val => |
|
722 decoat(kernel_coated, front_part.length - 1) match { |
|
723 case Left(vk) => if(isalts) coat(inner_rectfunct(coat(Left(vk), target_vs_outerlayers)), init_ind) |
|
724 else coat(inner_rectfunct(coat((vk), target_vs_outerlayers)), init_ind)//add clause: if rs_simp(init_ind) is an alts |
|
725 case vk => if(isalts) coat(inner_rectfunct(coat(Left(vk), target_vs_outerlayers)), init_ind) |
|
726 else coat(inner_rectfunct(coat((vk), target_vs_outerlayers)), init_ind) |
|
727 } |
|
728 } |
|
729 } |
|
730 else{ |
|
731 if(rightend2){ |
|
732 kernel_coated: Val => |
|
733 decoat(kernel_coated, front_part.length - 1) match { |
|
734 case Left(vk) => coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind)//add clause: if rs_simp(init_ind) is an alts |
|
735 case vk => coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind) |
|
736 } |
|
737 } |
|
738 else{ |
|
739 kernel_coated: Val => |
|
740 decoat(kernel_coated, front_part.length - 1) match { |
|
741 case Left(vk) => if(isalts) coat(Left(inner_rectfunct(coat(Left(vk), target_vs_outerlayers))), init_ind) |
|
742 else coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind)//add clause: if rs_simp(init_ind) is an alts |
|
743 case vk => if(isalts) coat(Left(inner_rectfunct(coat(Left(vk), target_vs_outerlayers))), init_ind) |
|
744 else coat(Left(inner_rectfunct(coat(vk, target_vs_outerlayers))), init_ind) |
|
745 } |
|
746 } |
|
747 |
|
748 } |
|
749 |
|
750 } |
|
751 //case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) |
|
752 case (r, v) => (v => v) |
576 } |
753 } |
577 //This version takes a regex and a value, return a simplified regex and its corresponding simplified value |
754 //This version takes a regex and a value, return a simplified regex and its corresponding simplified value |
578 def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r,v) match{ |
755 def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r,v) match{ |
579 case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match { |
756 case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match { |
580 case ((AZERO, _), (_, _) )=> (AZERO, undefined) |
757 case ((AZERO, _), (_, _) )=> (AZERO, undefined) |
581 case ((_, _), (AZERO, _)) => (AZERO, undefined) |
758 case ((_, _), (AZERO, _)) => (AZERO, undefined) |
582 case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx. |
759 case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx. |
583 case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s), Sequ(v1s, v2s)) |
760 case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s), Sequ(v1s, v2s)) |
584 } |
761 } |
585 case (AALTS(bs1, rs), v) => { |
762 case (AALTS(bs1, rs), v) => { |
586 //phase 1 transformation so that aalts(bs1, rs) => aalts(bs1, rsf) and v => vf |
|
587 val init_ind = find_pos(v, rs) |
763 val init_ind = find_pos(v, rs) |
588 val vs = bsimp2(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to match the regx rs[i] |
764 val vpointr = bsimp2(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to match the regx rs[i] |
589 //println(vs) |
765 val target_sv = vpointr._2 |
|
766 val target_sr = vpointr._1 |
|
767 |
590 val rs_simp = rs.map(bsimp) |
768 val rs_simp = rs.map(bsimp) |
591 val vs_kernel = rs_simp(init_ind) match { |
769 val target_vs_kernel = target_sr match { |
592 case AALTS(bs2, rs2) => remove(vs._2, rs2)//remove the secondary layer of left and right |
770 case AALTS(bs2, rs2) => remove(target_sv, rs2)//if rs(init_ind) after simp is also an alts, we remove the R(....L(v)) outside v |
593 case r => vs._2 |
771 case r => target_sv |
594 } |
772 } |
595 val flat_res = flats(rs_simp) |
773 val flat_res = flats(rs_simp) |
596 val vs_for_coating = if(isend(vs._2, rs_simp, init_ind)||flat_res.length == 1) vs_kernel else Left(vs_kernel) |
774 val flats_shift1 = right_shift(rs_simp, init_ind) |
597 val r_s = rs_simp(init_ind)//or vs._1 |
775 val flats_base = find_pos_aux(target_sv, target_sr) |
598 val shift = flats_vsimp(rs_simp, init_ind) + find_pos_aux(vs._2, rs_simp(init_ind)) |
776 val flats_shift = flats_shift1 + flats_base//right_shift used to be called flats_vsimp |
599 val new_ind = init_ind + shift |
777 val new_ind = init_ind + flats_shift |
600 val vf = coat(vs_for_coating, new_ind) |
778 |
601 //flats2 returns a list of regex and a single v |
|
602 //now |- vf: ALTS(bs1, flat_res) |
|
603 //phase 2 transformation so that aalts(bs1, rsf) => aalts(bs, rsdb) and vf => vdb |
|
604 val dist_res = distinctBy(flat_res, erase) |
779 val dist_res = distinctBy(flat_res, erase) |
605 val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase) |
780 val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase) |
606 //val size_reduction = new_ind + 1 - front_part.length |
781 |
607 val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list |
782 val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list |
608 { |
783 { |
609 coat(vs_kernel, front_part.length - 1) |
784 coat(target_vs_kernel, front_part.length - 1) |
610 } |
785 } |
611 else{ |
786 else{ |
612 coat(Left(vs_kernel), front_part.length - 1) |
787 coat(Left(target_vs_kernel), front_part.length - 1) |
613 } |
788 } |
614 //println(vdb) |
|
615 //we don't need to transform vdb as this phase will not make enough changes to the regex to affect value. |
|
616 //the above statement needs verification. but can be left as is now. |
|
617 dist_res match { |
789 dist_res match { |
618 case Nil => (AZERO, undefined) |
790 case Nil => (AZERO, undefined) |
619 case s :: Nil => (fuse(bs1, s), vdb) |
791 case s :: Nil => (fuse(bs1, s), vdb) |
620 case rs => (AALTS(bs1, rs), vdb) |
792 case rs => (AALTS(bs1, rs), vdb) |
621 } |
793 } |
622 } |
794 } |
623 //case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) |
795 //case ASTAR(bs, r) => ASTAR(bs, bsimp(r)) |
624 case (r, v) => (r, v) |
796 case (r, v) => (r, v) |
625 } |
797 } |
|
798 //the below are all residuals from the bsimp2 function |
|
799 //val vs_for_coating = if(isend(vs._2, rs_simp, init_ind)||flat_res.length == 1) vs_kernel else Left(vs_kernel) |
|
800 //val vf = coat(vs_for_coating, new_ind) |
|
801 //flats2 returns a list of regex and a single v |
|
802 //now |- vf: ALTS(bs1, flat_res) |
|
803 //phase 2 transformation so that aalts(bs1, rsf) => aalts(bs, rsdb) and vf => vdb |
|
804 //val size_reduction = new_ind + 1 - front_part.length |
626 def vsimp(r: ARexp, v: Val): Val = bsimp2(r, v)._2 |
805 def vsimp(r: ARexp, v: Val): Val = bsimp2(r, v)._2 |
627 /*This version was first intended for returning a function however a value would be simpler. |
806 /*This version was first intended for returning a function however a value would be simpler. |
628 def bsimp2(r: ARexp, v: Val): (ARexp, Val => Val) = (r,v) match{ |
807 def bsimp2(r: ARexp, v: Val): (ARexp, Val => Val) = (r,v) match{ |
629 case (ASEQ(bs1, r1, r2), v) => (bsimp2(r1), bsimp2(r2)) match { |
808 case (ASEQ(bs1, r1, r2), v) => (bsimp2(r1), bsimp2(r2)) match { |
630 case ((AZERO, _), (_, _) )=> (AZERO, undefined) |
809 case ((AZERO, _), (_, _) )=> (AZERO, undefined) |