415 if (acc.contains(res)) distinctBy(xs, f, acc) |
415 if (acc.contains(res)) distinctBy(xs, f, acc) |
416 else x::distinctBy(xs, f, res::acc) |
416 else x::distinctBy(xs, f, res::acc) |
417 } |
417 } |
418 } |
418 } |
419 |
419 |
|
420 def projectFirstChild(rs: List[ARexp]) : List[ARexp] = rs match { |
|
421 case (ASEQ(bs, r1, r2p)::rs1) => r1::projectFirstChild(rs1) |
|
422 case Nil => Nil |
|
423 } |
|
424 |
|
425 |
|
426 def distinctBy2(xs: List[ARexp], acc: List[Rexp] = Nil): List[ARexp] = xs match { |
|
427 case Nil => Nil |
|
428 case (x::xs) => { |
|
429 val res = erase(x) |
|
430 if(acc.contains(res)) |
|
431 distinctBy2(xs, acc) |
|
432 else |
|
433 x match { |
|
434 case ASEQ(bs0, AALTS(bs1, rs), r2) => |
|
435 val newTerms = distinctBy2(rs.map(r1 => ASEQ(Nil, r1, r2)), acc) |
|
436 val rsPrime = projectFirstChild(newTerms) |
|
437 newTerms match { |
|
438 case Nil => distinctBy2(xs, acc) |
|
439 case t::Nil => ASEQ(bs0, fuse(bs1, rsPrime.head), r2)::distinctBy2(xs, erase(t)::acc) |
|
440 case ts => ASEQ(bs0, AALTS(bs1, rsPrime), r2)::distinctBy2(xs, newTerms.map(erase(_)):::acc) |
|
441 } |
|
442 case x => x::distinctBy2(xs, res::acc) |
|
443 } |
|
444 } |
|
445 } |
|
446 |
420 def prettyRexp(r: Rexp) : String = r match { |
447 def prettyRexp(r: Rexp) : String = r match { |
421 case STAR(r0) => s"${prettyRexp(r0)}*" |
448 case STAR(r0) => s"${prettyRexp(r0)}*" |
422 case SEQ(CHAR(c), r2) => c.toString ++ prettyRexp(r2) |
449 case SEQ(CHAR(c), r2) => c.toString ++ prettyRexp(r2) |
423 case SEQ(r1, r2) => s"${prettyRexp(r1)}~${prettyRexp(r2)}" |
450 case SEQ(r1, r2) => s"${prettyRexp(r1)}~${prettyRexp(r2)}" |
424 case CHAR(c) => c.toString |
451 case CHAR(c) => c.toString |
508 if (acc.contains(res)) distinctByWithAcc(xs, f, acc, accB) |
541 if (acc.contains(res)) distinctByWithAcc(xs, f, acc, accB) |
509 else distinctByWithAcc(xs, f, res::acc, x::accB) |
542 else distinctByWithAcc(xs, f, res::acc, x::accB) |
510 } |
543 } |
511 } |
544 } |
512 |
545 |
513 |
546 //(r1+r2)r + (r2'+r3)r' ~> (r1+r2)r + (r3)r' (erase r = erase r') (erase r2 = erase r2') |
|
547 //s = s1@s2 \in L R s1 \in L r2 s2 \in L r |
|
548 //s = s3@s4 s3 \in L r1 s4 \in L r |s3| < |s1| \nexists s3' s4' s.t. s3'@s4' = s and s3' \in L r1 s4' \in L r |
|
549 //(r1+r4)r ~> r1r +_usedToBeSeq r4r |
|
550 // | ss7: "erase a01 = erase a02 ∧ (distinctBy as2 erase (set (map erase as1)) = as2p) ⟹ |
|
551 // (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2) (ASTAR bs02 a02)]@rsc) s↝ |
|
552 // (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2p) (ASTAR bs02 a02)]@rsc)" |
|
553 // | ss8: "erase a01 = erase a02 ∧ (distinctBy [a2] erase (set (map erase as1)) = []) ⟹ |
|
554 // (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@[ASEQ bs a2 (ASTAR bs02 a02)]@rsc) s↝ |
|
555 // (rsa@[ASEQ bs (AALTs bs1 as1) (ASTAR bs01 a01)]@rsb@rsc)" |
|
556 // | ss9: "erase a01 = erase a02 ∧ (distinctBy as2 erase {erase a1} = as2p) ⟹ |
|
557 // (rsa@[ASEQ bs a1 (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2) (ASTAR bs02 a02)]@rsc) s↝ |
|
558 // (rsa@[ASEQ bs a1 (ASTAR bs01 a01)]@rsb@[ASEQ bs (AALTs bs2 as2p) (ASTAR bs02 a02)]@rsc)" |
|
559 |
|
560 |
|
561 //# of top-level terms |
|
562 // r1r2 -> r11r2 + r12 -> r21r2 + r22 + r12' -> 4 terms -> 5 terms -> 6.......... |
|
563 // if string long enough --> #| r1r2 \s s | > unbounded? No -> #| r1r2 \s s | < #| pders UNIV r1| + #|pders UNIV r2| <= awidth r1r2 |
|
564 //r* -> r1r* -> r21r* + r22r* -> 4 terms -> 8 terms.............. |
514 def strongDB(xs: List[ARexp], |
565 def strongDB(xs: List[ARexp], |
515 acc1: List[Rexp] = Nil, |
566 acc1: List[Rexp] = Nil, |
516 acc2 : List[(List[Rexp], Rexp)] = Nil): List[ARexp] = xs match { |
567 acc2 : List[(List[Rexp], Rexp)] = Nil): List[ARexp] = xs match { |
517 case Nil => Nil |
568 case Nil => Nil |
518 case (x::xs) => |
569 case (x::xs) => |
606 case STAR(r) => 1 + size(r) |
668 case STAR(r) => 1 + size(r) |
607 } |
669 } |
608 |
670 |
609 def asize(a: ARexp) = size(erase(a)) |
671 def asize(a: ARexp) = size(erase(a)) |
610 |
672 |
|
673 //pder related |
|
674 type Mon = (Char, Rexp) |
|
675 type Lin = Set[Mon] |
|
676 |
|
677 def dot_prod(rs: Set[Rexp], r: Rexp): Set[Rexp] = r match { |
|
678 case ZERO => Set() |
|
679 case ONE => rs |
|
680 case r => rs.map((re) => if (re == ONE) r else SEQ(re, r) ) |
|
681 } |
|
682 def cir_prod(l: Lin, t: Rexp): Lin = t match {//remember this Lin is different from the Lin in Antimirov's paper. Here it does not mean the set of all subsets of linear forms that does not contain zero, but rather the type a set of linear forms |
|
683 case ZERO => Set() |
|
684 case ONE => l |
|
685 case t => l.map( m => m._2 match {case ZERO => m case ONE => (m._1, t) case p => (m._1, SEQ(p, t)) } ) |
|
686 } |
|
687 def lf(r: Rexp): Lin = r match { |
|
688 case ZERO => Set() |
|
689 case ONE => Set() |
|
690 case CHAR(f) => { |
|
691 //val Some(c) = alphabet.find(f) |
|
692 Set((f, ONE)) |
|
693 } |
|
694 case ALTS(r1, r2) => { |
|
695 lf(r1 ) ++ lf(r2) |
|
696 } |
|
697 case STAR(r1) => cir_prod(lf(r1), STAR(r1)) //may try infix notation later...... |
|
698 case SEQ(r1, r2) =>{ |
|
699 if (nullable(r1)) |
|
700 cir_prod(lf(r1), r2) ++ lf(r2) |
|
701 else |
|
702 cir_prod(lf(r1), r2) |
|
703 } |
|
704 } |
|
705 def lfs(r: Set[Rexp]): Lin = { |
|
706 r.foldLeft(Set[Mon]())((acc, r) => acc ++ lf(r)) |
|
707 } |
|
708 |
|
709 def pder(x: Char, t: Rexp): Set[Rexp] = { |
|
710 val lft = lf(t) |
|
711 (lft.filter(mon => if(mon._1 == x) true else false)).map(mon => mon._2) |
|
712 } |
|
713 def pders_single(s: List[Char], t: Rexp) : Set[Rexp] = s match { |
|
714 case x::xs => pders(xs, pder(x, t)) |
|
715 case Nil => Set(t) |
|
716 } |
|
717 def pders(s: List[Char], ts: Set[Rexp]) : Set[Rexp] = s match { |
|
718 case x::xs => pders(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t))) |
|
719 case Nil => ts |
|
720 } |
|
721 def pderss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]() )( (acc, s) => pders_single(s, t) ++ acc ) |
|
722 def pdera(t: Rexp): Set[Rexp] = lf(t).map(mon => mon._2) |
|
723 //all implementation of partial derivatives that involve set union are potentially buggy |
|
724 //because they don't include the original regular term before they are pdered. |
|
725 //now only pderas is fixed. |
|
726 def pderas(t: Set[Rexp], d: Int): Set[Rexp] = if(d > 0) pderas(lfs(t).map(mon => mon._2), d - 1) ++ t else lfs(t).map(mon => mon._2) ++ t//repeated application of pderas over the newest set of pders. |
|
727 def pderUNIV(r: Rexp) : Set[Rexp] = pderas(Set(r), awidth(r)) |
|
728 def awidth(r: Rexp) : Int = r match { |
|
729 case CHAR(c) => 1 |
|
730 case SEQ(r1, r2) => awidth(r1) + awidth(r2) |
|
731 case ALTS(r1, r2) => awidth(r1) + awidth(r2) |
|
732 case ONE => 0 |
|
733 case ZERO => 0 |
|
734 case STAR(r) => awidth(r) |
|
735 } |
|
736 //def sigma_lf(l: Set[Mon]) : Rexp = ALTS(l.map(mon => SEQ(CHAR(mon._1),mon._2)).toList) |
|
737 //def sigma(rs: Set[Rexp]) : Rexp = ALTS(rs.toList) |
|
738 def o(r: Rexp) = if (nullable(r)) ONE else ZERO |
|
739 //def nlf(t: Rexp) : Rexp = ALTS(List( o(t), sigma_lf(lf(t)) )) |
|
740 def pdp(x: Char, r: Rexp) : Set[Rexp] = r match { |
|
741 case ZERO => Set[Rexp]() |
|
742 case ONE => Set[Rexp]() |
|
743 case CHAR(f) => if(x == f) Set(ONE) else Set[Rexp]() |
|
744 case ALTS(r1, r2) => pdp(x, r1) ++ pdp(x, r2) |
|
745 case STAR(r1) => pdp(x, r).map(a => SEQ(a, STAR(r1))) |
|
746 case SEQ(a0, b) => if(nullable(a0)) pdp(x, a0).map(a => SEQ(a, b)) ++ pdp(x, b) else pdp(x, a0).map(a => SEQ(a, b)) |
|
747 } |
|
748 def pdps(s: List[Char], ts: Set[Rexp]): Set[Rexp] = s match { |
|
749 case x::xs => pdps(xs, ts.foldLeft(Set[Rexp]())((acc, t) => acc ++ pder(x, t))) |
|
750 case Nil => ts |
|
751 } |
|
752 def pdpss(ss: List[List[Char]], t: Rexp): Set[Rexp] = ss.foldLeft( Set[Rexp]())((acc, s) => pdps(s, Set(t)) ++ acc) |
|
753 |
|
754 |
611 |
755 |
612 // @arg(doc = "small tests") |
756 // @arg(doc = "small tests") |
613 val STARREG = ((STAR("a") | STAR("aa") ).%).% |
757 val STARREG = (((STAR("a") | (STAR("aa")) | STAR(STAR("aaa") | STAR("aaaa")) | STAR("aaaaa") | (STAR("aaaaaa")) | STAR("aaaaaaa") | STAR("aaaaaaaa")).%).%).% |
|
758 //(STAR("a") | ( STAR("aaa")) | STAR("aaaa") | STAR("aaaaa")).%.%.% |
614 |
759 |
615 @main |
760 @main |
616 def small() = { |
761 def small() = { |
617 |
762 |
618 val prog0 = """aaaaaaaaa""" |
763 |
619 println(s"test: $prog0") |
|
620 // println(lexing_simp(NOTREG, prog0)) |
764 // println(lexing_simp(NOTREG, prog0)) |
621 // val v = lex_simp(NOTREG, prog0.toList) |
765 // val v = lex_simp(NOTREG, prog0.toList) |
622 // println(v) |
766 // println(v) |
623 |
767 |
624 // val d = (lex_simp(NOTREG, prog0.toList)) |
768 // val d = (lex_simp(NOTREG, prog0.toList)) |
625 // println(d) |
769 // println(d) |
626 |
770 val pderSTAR = pderUNIV(STARREG) |
627 val bd = bdersSimp(prog0, STARREG) |
771 val refSize = pderSTAR.map(size(_)).sum |
628 println(erase(bd)) |
772 println(refSize) |
629 println(asize(bd)) |
773 for(i <- 10 to 100){ |
|
774 val prog0 = "a" * i |
|
775 println(s"test: $prog0") |
|
776 val bd = bdersSimp(prog0, STARREG)//DB |
|
777 val sbd = bdersSimpS(prog0, STARREG)//strongDB |
|
778 |
|
779 |
|
780 // println(shortRexpOutput(erase(sbd))) |
|
781 // println(shortRexpOutput(erase(bd))) |
|
782 println("pdersize, original, strongSimp, simp") |
|
783 println(refSize, size(STARREG), asize(sbd), asize(bd)) |
630 |
784 |
631 val vres = blexing_simp( STARREG, prog0) |
785 val vres = blexing_simp( STARREG, prog0) |
632 println(vres) |
786 println(vres) |
|
787 } |
633 // println(vs.length) |
788 // println(vs.length) |
634 // println(vs) |
789 // println(vs) |
635 |
790 |
636 |
791 |
637 // val prog1 = """read n; write n""" |
792 // val prog1 = """read n; write n""" |