440 case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy2(xs, newTerms.map(erase(_)):::acc) |
440 case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy2(xs, newTerms.map(erase(_)):::acc) |
441 } |
441 } |
442 case x => x::distinctBy2(xs, res::acc) |
442 case x => x::distinctBy2(xs, res::acc) |
443 } |
443 } |
444 } |
444 } |
|
445 } |
|
446 |
|
447 |
|
448 |
|
449 def distinctBy3(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match { |
|
450 case Nil => Nil |
|
451 case (x::xs) => { |
|
452 val res = erase(x) |
|
453 if(acc.contains(res)) |
|
454 distinctBy3(xs, acc) |
|
455 else |
|
456 x match { |
|
457 case ASEQ(bs0, AALTS(bs1, rs), r2) => |
|
458 val newTerms = distinctBy3(rs.map(r1 => ASEQ(Nil, r1, r2)), acc) |
|
459 val rsPrime = projectFirstChild(newTerms) |
|
460 newTerms match { |
|
461 case Nil => distinctBy3(xs, acc) |
|
462 case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy3(xs, erase(t)::acc) |
|
463 case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy3(xs, newTerms.map(erase(_)):::acc) |
|
464 } |
|
465 case x => x::distinctBy3(xs, res::acc) |
|
466 } |
|
467 } |
|
468 } |
|
469 |
|
470 def breakIntoTerms(r: Rexp) : List[Rexp] = r match { |
|
471 case SEQ(r1, r2) => breakIntoTerms(r1).map(r11 => SEQ(r11, r2)) |
|
472 case ALTS(r1, r2) => breakIntoTerms(r1) ::: breakIntoTerms(r2) |
|
473 case _ => r::Nil |
445 } |
474 } |
446 |
475 |
447 def prettyRexp(r: Rexp) : String = r match { |
476 def prettyRexp(r: Rexp) : String = r match { |
448 case STAR(r0) => s"${prettyRexp(r0)}*" |
477 case STAR(r0) => s"${prettyRexp(r0)}*" |
449 case SEQ(CHAR(c), r2) => c.toString ++ prettyRexp(r2) |
478 case SEQ(CHAR(c), r2) => c.toString ++ prettyRexp(r2) |
495 } |
524 } |
496 |
525 |
497 def blexing_simp(r: Rexp, s: String) : Val = { |
526 def blexing_simp(r: Rexp, s: String) : Val = { |
498 val bit_code = blex_simp(internalise(r), s.toList) |
527 val bit_code = blex_simp(internalise(r), s.toList) |
499 decode(r, bit_code) |
528 decode(r, bit_code) |
|
529 } |
|
530 |
|
531 def strong_blexing_simp(r: Rexp, s: String) : Val = { |
|
532 decode(r, strong_blex_simp(internalise(r), s.toList)) |
|
533 } |
|
534 |
|
535 def strong_blex_simp(r: ARexp, s: List[Char]) :Bits = s match { |
|
536 case Nil => { |
|
537 if (bnullable(r)) { |
|
538 //println(asize(r)) |
|
539 val bits = mkepsBC(r) |
|
540 bits |
|
541 } |
|
542 else |
|
543 throw new Exception("Not matched") |
|
544 } |
|
545 case c::cs => { |
|
546 val der_res = bder(c,r) |
|
547 val simp_res = strongBsimp(der_res) |
|
548 strong_blex_simp(simp_res, cs) |
|
549 } |
500 } |
550 } |
501 |
551 |
502 |
552 |
503 |
553 |
504 def bders_simp(s: List[Char], r: ARexp) : ARexp = s match { |
554 def bders_simp(s: List[Char], r: ARexp) : ARexp = s match { |
625 } |
675 } |
626 case rPrime => x::strongDB(xs, erase(x)::acc1, acc2) |
676 case rPrime => x::strongDB(xs, erase(x)::acc1, acc2) |
627 } |
677 } |
628 |
678 |
629 } |
679 } |
630 |
680 |
631 } |
681 } |
|
682 |
|
683 def allCharSeq(r: Rexp) : Boolean = r match { |
|
684 case CHAR(c) => true |
|
685 case SEQ(r1, r2) => allCharSeq(r1) && allCharSeq(r2) |
|
686 case _ => false |
|
687 } |
|
688 |
|
689 def flattenSeq(r: Rexp) : String = r match { |
|
690 case CHAR(c) => c.toString |
|
691 case SEQ(r1, r2) => flattenSeq(r1) ++ flattenSeq(r2) |
|
692 case _ => throw new Error("flatten unflattenable rexp") |
|
693 } |
632 |
694 |
633 def shortRexpOutput(r: Rexp) : String = r match { |
695 def shortRexpOutput(r: Rexp) : String = r match { |
634 case CHAR(c) => c.toString |
696 case CHAR(c) => c.toString |
635 case ONE => "1" |
697 case ONE => "1" |
636 case ZERO => "0" |
698 case ZERO => "0" |
|
699 case SEQ(r1, r2) if(allCharSeq(r)) => flattenSeq(r)//"[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]" |
637 case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]" |
700 case SEQ(r1, r2) => "[" ++ shortRexpOutput(r1) ++ "~" ++ shortRexpOutput(r2) ++ "]" |
638 case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")" |
701 case ALTS(r1, r2) => "(" ++ shortRexpOutput(r1) ++ "+" ++ shortRexpOutput(r2) ++ ")" |
639 case STAR(r) => "[" ++ shortRexpOutput(r) ++ "]*" |
702 case STAR(r) => "[" ++ shortRexpOutput(r) ++ "]*" |
640 //case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")" |
703 //case STAR(r) => "STAR(" ++ shortRexpOutput(r) ++ ")" |
641 //case RTOP => "RTOP" |
704 //case RTOP => "RTOP" |
722 def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2) |
786 def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2) |
723 //all implementation of partial derivatives that involve set union are potentially buggy |
787 //all implementation of partial derivatives that involve set union are potentially buggy |
724 //because they don't include the original regular term before they are pdered. |
788 //because they don't include the original regular term before they are pdered. |
725 //now only pderas is fixed. |
789 //now only pderas is fixed. |
726 def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders. |
790 def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders. |
727 def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r)) |
791 def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r) + 1) |
728 def awidth(r: Rexp) : Int = r match { |
792 def awidth(r: Rexp) : Int = r match { |
729 case CHAR(c) => 1 |
793 case CHAR(c) => 1 |
730 case SEQ(r1, r2) => awidth(r1) + awidth(r2) |
794 case SEQ(r1, r2) => awidth(r1) + awidth(r2) |
731 case ALTS(r1, r2) => awidth(r1) + awidth(r2) |
795 case ALTS(r1, r2) => awidth(r1) + awidth(r2) |
732 case ONE => 0 |
796 case ONE => 0 |
752 def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc) |
816 def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc) |
753 |
817 |
754 |
818 |
755 |
819 |
756 // @arg(doc = "small tests") |
820 // @arg(doc = "small tests") |
757 val STARREG = (((STAR("a") | (STAR("aa")) | STAR(STAR("aaa") | STAR("aaaa")) | STAR("aaaaa") | (STAR("aaaaaa")) | STAR("aaaaaaa") | STAR("aaaaaaaa")).%).%).% |
821 val STARREG = (((STAR("a") | (STAR("aa")) | STAR("aaa") | STAR("aaaa") | STAR("aaaaa") | (STAR("aaaaaa")) | STAR("aaaaaaa") | STAR("aaaaaaaa")).%)) |
758 //(STAR("a") | ( STAR("aaa")) | STAR("aaaa") | STAR("aaaaa")).%.%.% |
822 //(((STAR("a") | ( STAR("aaa")) | STAR("aaaaa")).%).%).% |
759 |
823 |
760 @main |
824 // @main |
761 def small() = { |
825 def small() = { |
762 |
826 |
763 |
827 |
764 // println(lexing_simp(NOTREG, prog0)) |
828 // println(lexing_simp(NOTREG, prog0)) |
765 // val v = lex_simp(NOTREG, prog0.toList) |
829 // val v = lex_simp(NOTREG, prog0.toList) |
766 // println(v) |
830 // println(v) |
767 |
831 |
768 // val d = (lex_simp(NOTREG, prog0.toList)) |
832 // val d = (lex_simp(NOTREG, prog0.toList)) |
769 // println(d) |
833 // println(d) |
770 val pderSTAR = pderUNIV(STARREG) |
834 val pderSTAR = pderUNIV(STARREG) |
|
835 |
771 val refSize = pderSTAR.map(size(_)).sum |
836 val refSize = pderSTAR.map(size(_)).sum |
772 println(refSize) |
837 println("different partial derivative terms:") |
773 for(i <- 10 to 100){ |
838 pderSTAR.foreach(r => r match { |
|
839 |
|
840 case SEQ(head, rstar) => |
|
841 println(shortRexpOutput(head) ++ "~STARREG") |
|
842 case STAR(rstar) => |
|
843 println("STARREG") |
|
844 |
|
845 } |
|
846 ) |
|
847 println("the total number of terms is") |
|
848 //println(refSize) |
|
849 println(pderSTAR.size) |
|
850 for(i <- List(1, 10, 100, 400, 800, 840, 900) ){ |
774 val prog0 = "a" * i |
851 val prog0 = "a" * i |
775 println(s"test: $prog0") |
852 //println(s"test: $prog0") |
|
853 println(s"testing with $i a's" ) |
776 val bd = bdersSimp(prog0, STARREG)//DB |
854 val bd = bdersSimp(prog0, STARREG)//DB |
777 val sbd = bdersSimpS(prog0, STARREG)//strongDB |
855 val sbd = bdersSimpS(prog0, STARREG)//strongDB |
|
856 val subTerms = breakIntoTerms(erase(sbd)) |
|
857 val subTermsLarge = breakIntoTerms(erase(bd)) |
|
858 |
|
859 println(s"subterms of regex with strongDB: ${subTerms.length}, standard DB: ${subTermsLarge.length}") |
|
860 println("the number of distinct subterms for bsimp with strongDB and standardDB") |
|
861 println(subTerms.distinct.size) |
|
862 println(subTermsLarge.distinct.size) |
778 |
863 |
779 |
864 |
780 // println(shortRexpOutput(erase(sbd))) |
865 // println(shortRexpOutput(erase(sbd))) |
781 // println(shortRexpOutput(erase(bd))) |
866 // println(shortRexpOutput(erase(bd))) |
782 println("pdersize, original, strongSimp, simp") |
867 |
783 println(refSize, size(STARREG), asize(sbd), asize(bd)) |
868 println("pdersize, original, strongSimp") |
784 |
869 println(refSize, size(STARREG), asize(sbd), asize(bd)) |
785 val vres = blexing_simp( STARREG, prog0) |
870 |
786 println(vres) |
871 // val vres = strong_blexing_simp( STARREG, prog0) |
|
872 // println(vres) |
787 } |
873 } |
788 // println(vs.length) |
874 // println(vs.length) |
789 // println(vs) |
875 // println(vs) |
790 |
876 |
791 |
877 |
792 // val prog1 = """read n; write n""" |
878 // val prog1 = """read n; write n""" |
793 // println(s"test: $prog1") |
879 // println(s"test: $prog1") |
794 // println(lexing_simp(WHILE_REGS, prog1)) |
880 // println(lexing_simp(WHILE_REGS, prog1)) |
795 } |
881 } |
|
882 |
|
883 small() |
796 |
884 |
797 // // Bigger Tests |
885 // // Bigger Tests |
798 // //============== |
886 // //============== |
799 |
887 |
800 // // escapes strings and prints them out as "", "\n" and so on |
888 // // escapes strings and prints them out as "", "\n" and so on |