|      1 import RexpRelated._ |         | 
|      2 import RexpRelated.Rexp._ |         | 
|      3 import Spiral._ |         | 
|      4 import scala.collection.mutable.ArrayBuffer |         | 
|      5 abstract class BRexp |         | 
|      6 case object BZERO extends BRexp |         | 
|      7 case object BONE extends BRexp |         | 
|      8 case class BCHAR(c: Char) extends BRexp |         | 
|      9 case class BALTS(b: Bit, rs: List[BRexp]) extends BRexp  |         | 
|     10 case class BSEQ(r1: BRexp, r2: BRexp) extends BRexp  |         | 
|     11 case class BSTAR(r: BRexp) extends BRexp  |         | 
|     12  |         | 
|     13 object BRexp{ |         | 
|     14   def brternalise(r: Rexp) : BRexp = r match {//remember the initial shadowed bit should be set to Z as there  |         | 
|     15   //are no enclosing STAR or SEQ |         | 
|     16     case ZERO => BZERO |         | 
|     17     case ONE => BONE |         | 
|     18     case CHAR(c) => BCHAR(c) |         | 
|     19     case ALTS(rs) => BALTS(Z,  rs.map(brternalise)) |         | 
|     20     case SEQ(r1, r2) => BSEQ( brternalise(r1), brternalise(r2) ) |         | 
|     21     case STAR(r) => BSTAR(brternalise(r)) |         | 
|     22     case RECD(x, r) => brternalise(r) |         | 
|     23   } |         | 
|     24   def brnullable (r: BRexp) : Boolean = r match { |         | 
|     25     case BZERO => false |         | 
|     26     case BONE => true |         | 
|     27     case BCHAR(_) => false |         | 
|     28     case BALTS(bs, rs) => rs.exists(brnullable) |         | 
|     29     case BSEQ(r1, r2) => brnullable(r1) && brnullable(r2) |         | 
|     30     case BSTAR(_) => true |         | 
|     31   } |         | 
|     32   //this function tells bspill when converting a brexp into a list of rexps |         | 
|     33   //the conversion : (a+b)*c -> {a*c, b*c} can only happen when a+b is generated from scratch (e.g. when deriving a seq) |         | 
|     34   //or is from the original regex but have been "touched" (i.e. have been derived) |         | 
|     35   //Why make this distinction? Look at the following example: |         | 
|     36   //r = (c+cb)+c(a+b) |         | 
|     37   // r\c = (1+b)+(a+b) |         | 
|     38   //after simplification |         | 
|     39   // (r\c)simp= 1+b+a |         | 
|     40   //we lost the structure that tells us 1+b should be grouped together and a grouped as itself |         | 
|     41   //however in our brexp simplification,  |         | 
|     42   //(r\c)br_simp = 1+b+(a+b) |         | 
|     43   //we do not allow the bracket to be opened as it is from the original expression and have not been touched |         | 
|     44   def brder(c: Char, r: BRexp) : BRexp = r match { |         | 
|     45     case BZERO => BZERO |         | 
|     46     case BONE => BZERO |         | 
|     47     case BCHAR(d) => if (c == d) BONE else BZERO |         | 
|     48     case BALTS(bs, rs) => BALTS(S, rs.map(brder(c, _)))//After derivative: Splittable in the sense of PD |         | 
|     49     case BSEQ(r1, r2) =>  |         | 
|     50       if (brnullable(r1)) BALTS(S, List(BSEQ(brder(c, r1), r2), brder(c, r2) ) )//in r1\c~r2 r2's structure is maintained together with its splittablility bit |         | 
|     51       else BSEQ(brder(c, r1), r2) |         | 
|     52     case BSTAR(r) => BSEQ(brder(c, r), BSTAR(r)) |         | 
|     53   } |         | 
|     54   def bflat(rs: List[BRexp]): List[BRexp] = { |         | 
|     55     rs match { |         | 
|     56       case Nil => Nil//TODO: Look at this! bs should have been exhausted one bit earlier. |         | 
|     57       case BZERO :: rs1 => bflat(rs1) |         | 
|     58       case BALTS(S, rs1) :: rs2 => rs1 ::: bflat(rs2) |         | 
|     59       case BALTS(Z, rs1) :: rs2 => BALTS(Z, rs1) :: bflat(rs2) |         | 
|     60       case r1 :: rs2 => r1 :: bflat(rs2) |         | 
|     61     } |         | 
|     62   } |         | 
|     63   def stflat(rs: List[BRexp]): List[BRexp] = { |         | 
|     64     //println("bs: " + bs + "rs: "+ rs + "length of bs, rs" + bs.length + ", " + rs.length) |         | 
|     65     //assert(bs.length == rs.length - 1 || bs.length == rs.length)//bs == Nil, rs == Nil  May add early termination later. |         | 
|     66     rs match { |         | 
|     67       case Nil => Nil//TODO: Look at this! bs should have been exhausted one bit earlier. |         | 
|     68       case BZERO :: rs1 => bflat(rs1) |         | 
|     69       case BALTS(_, rs1) :: rs2 => rs1 ::: bflat(rs2) |         | 
|     70       case r1 :: rs2 => r1 :: bflat(rs2) |         | 
|     71     } |         | 
|     72   } |         | 
|     73   def berase(r:BRexp): Rexp = r match{ |         | 
|     74     case BZERO => ZERO |         | 
|     75     case BONE => ONE |         | 
|     76     case BCHAR(f) => CHAR(f) |         | 
|     77     case BALTS(bs, rs) => ALTS(rs.map(berase(_))) |         | 
|     78     case BSEQ(r1, r2) => SEQ (berase(r1), berase(r2)) |         | 
|     79     case BSTAR(r)=> STAR(berase(r)) |         | 
|     80   } |         | 
|     81   def br_simp(r: BRexp): BRexp = r match { |         | 
|     82     case BSEQ(r1, r2) => (br_simp(r1), br_simp(r2)) match { |         | 
|     83         case (BZERO, _) => BZERO |         | 
|     84         case (_, BZERO) => BZERO |         | 
|     85         case (BONE, r2s) => r2s  |         | 
|     86         case (r1s, r2s) => BSEQ(r1s, r2s) |         | 
|     87     } |         | 
|     88     case BALTS(bs, rs) => { |         | 
|     89       //assert(bs.length == rs.length - 1) |         | 
|     90       val dist_res = if(bs == S) {//all S |         | 
|     91         val rs_simp = rs.map(br_simp) |         | 
|     92         val flat_res = bflat(rs_simp) |         | 
|     93         distinctBy(flat_res, berase) |         | 
|     94       } |         | 
|     95       else{//not allowed to simplify (all Z) |         | 
|     96         rs |         | 
|     97       } |         | 
|     98       dist_res match { |         | 
|     99         case Nil => {BZERO} |         | 
|    100         case s :: Nil => { s} |         | 
|    101         case rs => {BALTS(bs, rs)} |         | 
|    102       } |         | 
|    103     } |         | 
|    104     //case BSTAR(r) => BSTAR(r) |         | 
|    105     case r => r |         | 
|    106   } |         | 
|    107  |         | 
|    108   def strong_br_simp(r: BRexp): BRexp = r match { |         | 
|    109     case BSEQ(r1, r2) => (strong_br_simp(r1), strong_br_simp(r2)) match { |         | 
|    110         case (BZERO, _) => BZERO |         | 
|    111         case (_, BZERO) => BZERO |         | 
|    112         case (BONE, r2s) => r2s  |         | 
|    113         case (r1s, r2s) => BSEQ(r1s, r2s) |         | 
|    114     } |         | 
|    115     case BALTS(bs, rs) => { |         | 
|    116       //assert(bs.length == rs.length - 1) |         | 
|    117       val dist_res = {//all S |         | 
|    118         val rs_simp = rs.map(strong_br_simp) |         | 
|    119         val flat_res = stflat(rs_simp) |         | 
|    120         distinctBy(flat_res, berase) |         | 
|    121       } |         | 
|    122       dist_res match { |         | 
|    123         case Nil => {BZERO} |         | 
|    124         case s :: Nil => { s} |         | 
|    125         case rs => {BALTS(bs, rs)} |         | 
|    126       } |         | 
|    127     } |         | 
|    128     case BSTAR(r) => BSTAR(strong_br_simp(r)) |         | 
|    129     case r => r |         | 
|    130   } |         | 
|    131   //we want to bound the size by a function bspill s.t. |         | 
|    132   //bspill(ders_simp(r,s)) ⊂  PD(r)  |         | 
|    133   //and bspill is size-preserving (up to a constant factor) |         | 
|    134   //so we bound |ders_simp(r,s)| by |PD(r)| |         | 
|    135   //where we already have a nice bound for |PD(r)|: t^2*n^2 in Antimirov's paper |         | 
|    136  |         | 
|    137   //the function bspill converts a brexp into a list of rexps |         | 
|    138   //the conversion mainly about this: (r1+r2)*r3*r4*.....rn -> {r1*r3*r4*....rn, r2*r3*r4*.....rn}  |         | 
|    139   //but this is not always allowed |         | 
|    140   //sometimes, we just leave the expression as it is:  |         | 
|    141   //eg1: (a+b)*c -> {(a+b)*c} |         | 
|    142   //eg2: r1+r2 -> {r1+r2} instead of {r1, r2} |         | 
|    143   //why? |         | 
|    144   //if we always return {a, b} when we encounter a+b, the property |         | 
|    145   //bspill(ders_simp(r,s)) ⊂  PD(r)  |         | 
|    146   //does not always hold |         | 
|    147   //for instance  |         | 
|    148   //if we call the bspill that always returns {r1, r2} when encountering r1+r2 "bspilli" |         | 
|    149   //then bspilli( ders_simp( (c+cb)+c(a+b), c ) ) == bspilli(1+b+a) = {1, b, a} |         | 
|    150   //However the letter a does not belong to PD( (c+cb)+c(a+b) ) |         | 
|    151   //then we can no longer say ders_simp(r,s)'s size is bounded by PD(r) because the former contains something the latter does not have |         | 
|    152   //In order to make sure the inclusion holds, we have to find out why new terms appeared in the bspilli set that don't exist in the PD(r) set |         | 
|    153   //Why a does not belong to PD((c+cb)+c(a+b))? |         | 
|    154   //PD(r1+r2) = PD(r1) union PD(r2) => PD((c+cb)+c(a+b)) = PD(c+cb) union PD(c(a+b)) |         | 
|    155   //So the only possibility for PD to include a must be in the latter part of the regex, namely, c(a+b) |         | 
|    156   //we have lemma that PD(r) = union of pders(s, r) where s is taken over all strings whose length does not exceed depth(r) |         | 
|    157   //so PD(r) ⊂ pder(empty_string, r) union pder(c, r) union pder(ca, r) union pder(cb, r) where r = c(a+b) |         | 
|    158   //RHS = {1} union pder(c, c(a+b)) |         | 
|    159   //Observe that pder(c, c(a+b)) == {a+b} |         | 
|    160   //a and b are together, from the original regular expression (c+cb)+c(a+b). |         | 
|    161   //but by our simplification, we first flattened this a+b into the same level with 1+b, then |         | 
|    162   //removed duplicates of b, thereby destroying the structure in a+b and making this term a, instead of a+b |         | 
|    163   //But in PD(r) we only have a+b, no a |         | 
|    164   //one ad hoc solution might be to try this bspill(ders_simp(r,s)) ⊂  PD(r) union {all letters} |         | 
|    165   //But this does not hold either according to experiment. |         | 
|    166   //So we need to make sure the structure r1+r2 is not simplified away if it is from the original expression |         | 
|    167    |         | 
|    168  |         | 
|    169    |         | 
|    170   def bspill(r: BRexp): List[Rexp] = { |         | 
|    171       r match { |         | 
|    172           case BALTS(b, rs) => { |         | 
|    173             if(b == S) |         | 
|    174               rs.flatMap(bspill) |         | 
|    175             else |         | 
|    176               List(ALTS(rs.map(berase))) |         | 
|    177           } |         | 
|    178           case BSEQ(r2, r3) => bspill(r2).map( a => if(a == ONE) berase(r3) else SEQ(a, berase(r3)) ) |         | 
|    179           case BZERO => List() |         | 
|    180           case r => List(berase(r)) |         | 
|    181       } |         | 
|    182      |         | 
|    183   } |         | 
|    184  |         | 
|    185 } |         |