Brexp.scala
changeset 151 73f990bc6843
parent 150 b51d34113d47
equal deleted inserted replaced
150:b51d34113d47 151:73f990bc6843
     1 import RexpRelated._
       
     2 import RexpRelated.Rexp._
       
     3 import Spiral._
       
     4 import scala.collection.mutable.ArrayBuffer
       
     5 abstract class BRexp
       
     6 case object BZERO extends BRexp
       
     7 case object BONE extends BRexp
       
     8 case class BCHAR(c: Char) extends BRexp
       
     9 case class BALTS(b: Bit, rs: List[BRexp]) extends BRexp 
       
    10 case class BSEQ(r1: BRexp, r2: BRexp) extends BRexp 
       
    11 case class BSTAR(r: BRexp) extends BRexp 
       
    12 
       
    13 object BRexp{
       
    14   def brternalise(r: Rexp) : BRexp = r match {//remember the initial shadowed bit should be set to Z as there 
       
    15   //are no enclosing STAR or SEQ
       
    16     case ZERO => BZERO
       
    17     case ONE => BONE
       
    18     case CHAR(c) => BCHAR(c)
       
    19     case ALTS(rs) => BALTS(Z,  rs.map(brternalise))
       
    20     case SEQ(r1, r2) => BSEQ( brternalise(r1), brternalise(r2) )
       
    21     case STAR(r) => BSTAR(brternalise(r))
       
    22     case RECD(x, r) => brternalise(r)
       
    23   }
       
    24   def brnullable (r: BRexp) : Boolean = r match {
       
    25     case BZERO => false
       
    26     case BONE => true
       
    27     case BCHAR(_) => false
       
    28     case BALTS(bs, rs) => rs.exists(brnullable)
       
    29     case BSEQ(r1, r2) => brnullable(r1) && brnullable(r2)
       
    30     case BSTAR(_) => true
       
    31   }
       
    32   //this function tells bspill when converting a brexp into a list of rexps
       
    33   //the conversion : (a+b)*c -> {a*c, b*c} can only happen when a+b is generated from scratch (e.g. when deriving a seq)
       
    34   //or is from the original regex but have been "touched" (i.e. have been derived)
       
    35   //Why make this distinction? Look at the following example:
       
    36   //r = (c+cb)+c(a+b)
       
    37   // r\c = (1+b)+(a+b)
       
    38   //after simplification
       
    39   // (r\c)simp= 1+b+a
       
    40   //we lost the structure that tells us 1+b should be grouped together and a grouped as itself
       
    41   //however in our brexp simplification, 
       
    42   //(r\c)br_simp = 1+b+(a+b)
       
    43   //we do not allow the bracket to be opened as it is from the original expression and have not been touched
       
    44   def brder(c: Char, r: BRexp) : BRexp = r match {
       
    45     case BZERO => BZERO
       
    46     case BONE => BZERO
       
    47     case BCHAR(d) => if (c == d) BONE else BZERO
       
    48     case BALTS(bs, rs) => BALTS(S, rs.map(brder(c, _)))//After derivative: Splittable in the sense of PD
       
    49     case BSEQ(r1, r2) => 
       
    50       if (brnullable(r1)) BALTS(S, List(BSEQ(brder(c, r1), r2), brder(c, r2) ) )//in r1\c~r2 r2's structure is maintained together with its splittablility bit
       
    51       else BSEQ(brder(c, r1), r2)
       
    52     case BSTAR(r) => BSEQ(brder(c, r), BSTAR(r))
       
    53   }
       
    54   def bflat(rs: List[BRexp]): List[BRexp] = {
       
    55     rs match {
       
    56       case Nil => Nil//TODO: Look at this! bs should have been exhausted one bit earlier.
       
    57       case BZERO :: rs1 => bflat(rs1)
       
    58       case BALTS(S, rs1) :: rs2 => rs1 ::: bflat(rs2)
       
    59       case BALTS(Z, rs1) :: rs2 => BALTS(Z, rs1) :: bflat(rs2)
       
    60       case r1 :: rs2 => r1 :: bflat(rs2)
       
    61     }
       
    62   }
       
    63   def stflat(rs: List[BRexp]): List[BRexp] = {
       
    64     //println("bs: " + bs + "rs: "+ rs + "length of bs, rs" + bs.length + ", " + rs.length)
       
    65     //assert(bs.length == rs.length - 1 || bs.length == rs.length)//bs == Nil, rs == Nil  May add early termination later.
       
    66     rs match {
       
    67       case Nil => Nil//TODO: Look at this! bs should have been exhausted one bit earlier.
       
    68       case BZERO :: rs1 => bflat(rs1)
       
    69       case BALTS(_, rs1) :: rs2 => rs1 ::: bflat(rs2)
       
    70       case r1 :: rs2 => r1 :: bflat(rs2)
       
    71     }
       
    72   }
       
    73   def berase(r:BRexp): Rexp = r match{
       
    74     case BZERO => ZERO
       
    75     case BONE => ONE
       
    76     case BCHAR(f) => CHAR(f)
       
    77     case BALTS(bs, rs) => ALTS(rs.map(berase(_)))
       
    78     case BSEQ(r1, r2) => SEQ (berase(r1), berase(r2))
       
    79     case BSTAR(r)=> STAR(berase(r))
       
    80   }
       
    81   def br_simp(r: BRexp): BRexp = r match {
       
    82     case BSEQ(r1, r2) => (br_simp(r1), br_simp(r2)) match {
       
    83         case (BZERO, _) => BZERO
       
    84         case (_, BZERO) => BZERO
       
    85         case (BONE, r2s) => r2s 
       
    86         case (r1s, r2s) => BSEQ(r1s, r2s)
       
    87     }
       
    88     case BALTS(bs, rs) => {
       
    89       //assert(bs.length == rs.length - 1)
       
    90       val dist_res = if(bs == S) {//all S
       
    91         val rs_simp = rs.map(br_simp)
       
    92         val flat_res = bflat(rs_simp)
       
    93         distinctBy(flat_res, berase)
       
    94       }
       
    95       else{//not allowed to simplify (all Z)
       
    96         rs
       
    97       }
       
    98       dist_res match {
       
    99         case Nil => {BZERO}
       
   100         case s :: Nil => { s}
       
   101         case rs => {BALTS(bs, rs)}
       
   102       }
       
   103     }
       
   104     //case BSTAR(r) => BSTAR(r)
       
   105     case r => r
       
   106   }
       
   107 
       
   108   def strong_br_simp(r: BRexp): BRexp = r match {
       
   109     case BSEQ(r1, r2) => (strong_br_simp(r1), strong_br_simp(r2)) match {
       
   110         case (BZERO, _) => BZERO
       
   111         case (_, BZERO) => BZERO
       
   112         case (BONE, r2s) => r2s 
       
   113         case (r1s, r2s) => BSEQ(r1s, r2s)
       
   114     }
       
   115     case BALTS(bs, rs) => {
       
   116       //assert(bs.length == rs.length - 1)
       
   117       val dist_res = {//all S
       
   118         val rs_simp = rs.map(strong_br_simp)
       
   119         val flat_res = stflat(rs_simp)
       
   120         distinctBy(flat_res, berase)
       
   121       }
       
   122       dist_res match {
       
   123         case Nil => {BZERO}
       
   124         case s :: Nil => { s}
       
   125         case rs => {BALTS(bs, rs)}
       
   126       }
       
   127     }
       
   128     case BSTAR(r) => BSTAR(strong_br_simp(r))
       
   129     case r => r
       
   130   }
       
   131   //we want to bound the size by a function bspill s.t.
       
   132   //bspill(ders_simp(r,s)) ⊂  PD(r) 
       
   133   //and bspill is size-preserving (up to a constant factor)
       
   134   //so we bound |ders_simp(r,s)| by |PD(r)|
       
   135   //where we already have a nice bound for |PD(r)|: t^2*n^2 in Antimirov's paper
       
   136 
       
   137   //the function bspill converts a brexp into a list of rexps
       
   138   //the conversion mainly about this: (r1+r2)*r3*r4*.....rn -> {r1*r3*r4*....rn, r2*r3*r4*.....rn} 
       
   139   //but this is not always allowed
       
   140   //sometimes, we just leave the expression as it is: 
       
   141   //eg1: (a+b)*c -> {(a+b)*c}
       
   142   //eg2: r1+r2 -> {r1+r2} instead of {r1, r2}
       
   143   //why?
       
   144   //if we always return {a, b} when we encounter a+b, the property
       
   145   //bspill(ders_simp(r,s)) ⊂  PD(r) 
       
   146   //does not always hold
       
   147   //for instance 
       
   148   //if we call the bspill that always returns {r1, r2} when encountering r1+r2 "bspilli"
       
   149   //then bspilli( ders_simp( (c+cb)+c(a+b), c ) ) == bspilli(1+b+a) = {1, b, a}
       
   150   //However the letter a does not belong to PD( (c+cb)+c(a+b) )
       
   151   //then we can no longer say ders_simp(r,s)'s size is bounded by PD(r) because the former contains something the latter does not have
       
   152   //In order to make sure the inclusion holds, we have to find out why new terms appeared in the bspilli set that don't exist in the PD(r) set
       
   153   //Why a does not belong to PD((c+cb)+c(a+b))?
       
   154   //PD(r1+r2) = PD(r1) union PD(r2) => PD((c+cb)+c(a+b)) = PD(c+cb) union PD(c(a+b))
       
   155   //So the only possibility for PD to include a must be in the latter part of the regex, namely, c(a+b)
       
   156   //we have lemma that PD(r) = union of pders(s, r) where s is taken over all strings whose length does not exceed depth(r)
       
   157   //so PD(r) ⊂ pder(empty_string, r) union pder(c, r) union pder(ca, r) union pder(cb, r) where r = c(a+b)
       
   158   //RHS = {1} union pder(c, c(a+b))
       
   159   //Observe that pder(c, c(a+b)) == {a+b}
       
   160   //a and b are together, from the original regular expression (c+cb)+c(a+b).
       
   161   //but by our simplification, we first flattened this a+b into the same level with 1+b, then
       
   162   //removed duplicates of b, thereby destroying the structure in a+b and making this term a, instead of a+b
       
   163   //But in PD(r) we only have a+b, no a
       
   164   //one ad hoc solution might be to try this bspill(ders_simp(r,s)) ⊂  PD(r) union {all letters}
       
   165   //But this does not hold either according to experiment.
       
   166   //So we need to make sure the structure r1+r2 is not simplified away if it is from the original expression
       
   167   
       
   168 
       
   169   
       
   170   def bspill(r: BRexp): List[Rexp] = {
       
   171       r match {
       
   172           case BALTS(b, rs) => {
       
   173             if(b == S)
       
   174               rs.flatMap(bspill)
       
   175             else
       
   176               List(ALTS(rs.map(berase)))
       
   177           }
       
   178           case BSEQ(r2, r3) => bspill(r2).map( a => if(a == ONE) berase(r3) else SEQ(a, berase(r3)) )
       
   179           case BZERO => List()
       
   180           case r => List(berase(r))
       
   181       }
       
   182     
       
   183   }
       
   184 
       
   185 }