Brexp.scala
author Chengsong
Wed, 13 Mar 2019 13:33:54 +0000
changeset 1 99f4459d9bb6
parent 0 902326e1615a
child 2 cf169411b771
permissions -rwxr-xr-x
i
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
Chengsong
parents:
diff changeset
     1
import RexpRelated._
Chengsong
parents:
diff changeset
     2
import RexpRelated.Rexp._
Chengsong
parents:
diff changeset
     3
import Spiral._
Chengsong
parents:
diff changeset
     4
import scala.collection.mutable.ArrayBuffer
Chengsong
parents:
diff changeset
     5
abstract class BRexp
Chengsong
parents:
diff changeset
     6
case object BZERO extends BRexp
Chengsong
parents:
diff changeset
     7
case object BONE extends BRexp
Chengsong
parents:
diff changeset
     8
case class BCHAR(c: Char) extends BRexp
Chengsong
parents:
diff changeset
     9
case class BALTS(b: Bit, rs: List[BRexp]) extends BRexp 
Chengsong
parents:
diff changeset
    10
case class BSEQ(r1: BRexp, r2: BRexp) extends BRexp 
Chengsong
parents:
diff changeset
    11
case class BSTAR(r: BRexp) extends BRexp 
Chengsong
parents:
diff changeset
    12
Chengsong
parents:
diff changeset
    13
object BRexp{
Chengsong
parents:
diff changeset
    14
  def brternalise(r: Rexp) : BRexp = r match {//remember the initial shadowed bit should be set to Z as there 
Chengsong
parents:
diff changeset
    15
  //are no enclosing STAR or SEQ
Chengsong
parents:
diff changeset
    16
    case ZERO => BZERO
Chengsong
parents:
diff changeset
    17
    case ONE => BONE
Chengsong
parents:
diff changeset
    18
    case CHAR(c) => BCHAR(c)
Chengsong
parents:
diff changeset
    19
    case ALTS(rs) => BALTS(Z,  rs.map(brternalise))
Chengsong
parents:
diff changeset
    20
    case SEQ(r1, r2) => BSEQ( brternalise(r1), brternalise(r2) )
Chengsong
parents:
diff changeset
    21
    case STAR(r) => BSTAR(brternalise(r))
Chengsong
parents:
diff changeset
    22
    case RECD(x, r) => brternalise(r)
Chengsong
parents:
diff changeset
    23
  }
Chengsong
parents:
diff changeset
    24
  def brnullable (r: BRexp) : Boolean = r match {
Chengsong
parents:
diff changeset
    25
    case BZERO => false
Chengsong
parents:
diff changeset
    26
    case BONE => true
Chengsong
parents:
diff changeset
    27
    case BCHAR(_) => false
Chengsong
parents:
diff changeset
    28
    case BALTS(bs, rs) => rs.exists(brnullable)
Chengsong
parents:
diff changeset
    29
    case BSEQ(r1, r2) => brnullable(r1) && brnullable(r2)
Chengsong
parents:
diff changeset
    30
    case BSTAR(_) => true
Chengsong
parents:
diff changeset
    31
  }
1
Chengsong
parents: 0
diff changeset
    32
  //this function tells bspill when converting a brexp into a list of rexps
Chengsong
parents: 0
diff changeset
    33
  //the conversion : (a+b)*c -> {a*c, b*c} can only happen when a+b is generated from scratch (e.g. when deriving a seq)
Chengsong
parents: 0
diff changeset
    34
  //or is from the original regex but have been "touched" (i.e. have been derived)
Chengsong
parents: 0
diff changeset
    35
  //Why make this distinction? Look at the following example:
Chengsong
parents: 0
diff changeset
    36
  //r = (c+cb)+c(a+b)
Chengsong
parents: 0
diff changeset
    37
  // r\c = (1+b)+(a+b)
Chengsong
parents: 0
diff changeset
    38
  //after simplification
Chengsong
parents: 0
diff changeset
    39
  // (r\c)simp= 1+b+a
Chengsong
parents: 0
diff changeset
    40
  //we lost the structure that tells us 1+b should be grouped together and a grouped as itself
Chengsong
parents: 0
diff changeset
    41
  //however in our brexp simplification, 
Chengsong
parents: 0
diff changeset
    42
  //(r\c)br_simp = 1+b+(a+b)
Chengsong
parents: 0
diff changeset
    43
  //we do not allow the bracket to be opened as it is from the original expression and have not been touched
0
Chengsong
parents:
diff changeset
    44
  def brder(c: Char, r: BRexp) : BRexp = r match {
Chengsong
parents:
diff changeset
    45
    case BZERO => BZERO
Chengsong
parents:
diff changeset
    46
    case BONE => BZERO
Chengsong
parents:
diff changeset
    47
    case BCHAR(d) => if (c == d) BONE else BZERO
Chengsong
parents:
diff changeset
    48
    case BALTS(bs, rs) => BALTS(S, rs.map(brder(c, _)))//After derivative: Splittable in the sense of PD
Chengsong
parents:
diff changeset
    49
    case BSEQ(r1, r2) => 
Chengsong
parents:
diff changeset
    50
      if (brnullable(r1)) BALTS(S, List(BSEQ(brder(c, r1), r2), brder(c, r2) ) )//in r1\c~r2 r2's structure is maintained together with its splittablility bit
Chengsong
parents:
diff changeset
    51
      else BSEQ(brder(c, r1), r2)
Chengsong
parents:
diff changeset
    52
    case BSTAR(r) => BSEQ(brder(c, r), BSTAR(r))
Chengsong
parents:
diff changeset
    53
  }
Chengsong
parents:
diff changeset
    54
  def bflat(rs: List[BRexp]): List[BRexp] = {
Chengsong
parents:
diff changeset
    55
    //println("bs: " + bs + "rs: "+ rs + "length of bs, rs" + bs.length + ", " + rs.length)
Chengsong
parents:
diff changeset
    56
    //assert(bs.length == rs.length - 1 || bs.length == rs.length)//bs == Nil, rs == Nil  May add early termination later.
Chengsong
parents:
diff changeset
    57
    rs match {
Chengsong
parents:
diff changeset
    58
      case Nil => Nil//TODO: Look at this! bs should have been exhausted one bit earlier.
Chengsong
parents:
diff changeset
    59
      case BZERO :: rs1 => bflat(rs1)
Chengsong
parents:
diff changeset
    60
      case BALTS(S, rs1) :: rs2 => rs1 ::: bflat(rs2)
Chengsong
parents:
diff changeset
    61
      case BALTS(Z, rs1) :: rs2 => BALTS(Z, rs1) :: bflat(rs2)
Chengsong
parents:
diff changeset
    62
      case r1 :: rs2 => r1 :: bflat(rs2)
Chengsong
parents:
diff changeset
    63
    }
Chengsong
parents:
diff changeset
    64
  }
Chengsong
parents:
diff changeset
    65
  def stflat(rs: List[BRexp]): List[BRexp] = {
Chengsong
parents:
diff changeset
    66
    //println("bs: " + bs + "rs: "+ rs + "length of bs, rs" + bs.length + ", " + rs.length)
Chengsong
parents:
diff changeset
    67
    //assert(bs.length == rs.length - 1 || bs.length == rs.length)//bs == Nil, rs == Nil  May add early termination later.
Chengsong
parents:
diff changeset
    68
    rs match {
Chengsong
parents:
diff changeset
    69
      case Nil => Nil//TODO: Look at this! bs should have been exhausted one bit earlier.
Chengsong
parents:
diff changeset
    70
      case BZERO :: rs1 => bflat(rs1)
Chengsong
parents:
diff changeset
    71
      case BALTS(_, rs1) :: rs2 => rs1 ::: bflat(rs2)
Chengsong
parents:
diff changeset
    72
      case r1 :: rs2 => r1 :: bflat(rs2)
Chengsong
parents:
diff changeset
    73
    }
Chengsong
parents:
diff changeset
    74
  }
Chengsong
parents:
diff changeset
    75
  def berase(r:BRexp): Rexp = r match{
Chengsong
parents:
diff changeset
    76
    case BZERO => ZERO
Chengsong
parents:
diff changeset
    77
    case BONE => ONE
Chengsong
parents:
diff changeset
    78
    case BCHAR(f) => CHAR(f)
Chengsong
parents:
diff changeset
    79
    case BALTS(bs, rs) => ALTS(rs.map(berase(_)))
Chengsong
parents:
diff changeset
    80
    case BSEQ(r1, r2) => SEQ (berase(r1), berase(r2))
Chengsong
parents:
diff changeset
    81
    case BSTAR(r)=> STAR(berase(r))
Chengsong
parents:
diff changeset
    82
  }
Chengsong
parents:
diff changeset
    83
  def br_simp(r: BRexp): BRexp = r match {
Chengsong
parents:
diff changeset
    84
    case BSEQ(r1, r2) => (br_simp(r1), br_simp(r2)) match {
Chengsong
parents:
diff changeset
    85
        case (BZERO, _) => BZERO
Chengsong
parents:
diff changeset
    86
        case (_, BZERO) => BZERO
Chengsong
parents:
diff changeset
    87
        case (BONE, r2s) => r2s 
Chengsong
parents:
diff changeset
    88
        case (r1s, r2s) => BSEQ(r1s, r2s)
Chengsong
parents:
diff changeset
    89
    }
Chengsong
parents:
diff changeset
    90
    case BALTS(bs, rs) => {
Chengsong
parents:
diff changeset
    91
      //assert(bs.length == rs.length - 1)
Chengsong
parents:
diff changeset
    92
      val dist_res = if(bs == S) {//all S
Chengsong
parents:
diff changeset
    93
        val rs_simp = rs.map(br_simp)
Chengsong
parents:
diff changeset
    94
        val flat_res = bflat(rs_simp)
Chengsong
parents:
diff changeset
    95
        distinctBy(flat_res, berase)
Chengsong
parents:
diff changeset
    96
      }
Chengsong
parents:
diff changeset
    97
      else{//not allowed to simplify (all Z)
Chengsong
parents:
diff changeset
    98
        rs
Chengsong
parents:
diff changeset
    99
      }
Chengsong
parents:
diff changeset
   100
      dist_res match {
Chengsong
parents:
diff changeset
   101
        case Nil => {BZERO}
Chengsong
parents:
diff changeset
   102
        case s :: Nil => { s}
Chengsong
parents:
diff changeset
   103
        case rs => {BALTS(bs, rs)}
Chengsong
parents:
diff changeset
   104
      }
Chengsong
parents:
diff changeset
   105
    }
Chengsong
parents:
diff changeset
   106
    //case BSTAR(r) => BSTAR(r)
Chengsong
parents:
diff changeset
   107
    case r => r
Chengsong
parents:
diff changeset
   108
  }
Chengsong
parents:
diff changeset
   109
Chengsong
parents:
diff changeset
   110
  def strong_br_simp(r: BRexp): BRexp = r match {
Chengsong
parents:
diff changeset
   111
    case BSEQ(r1, r2) => (strong_br_simp(r1), strong_br_simp(r2)) match {
Chengsong
parents:
diff changeset
   112
        case (BZERO, _) => BZERO
Chengsong
parents:
diff changeset
   113
        case (_, BZERO) => BZERO
Chengsong
parents:
diff changeset
   114
        case (BONE, r2s) => r2s 
Chengsong
parents:
diff changeset
   115
        case (r1s, r2s) => BSEQ(r1s, r2s)
Chengsong
parents:
diff changeset
   116
    }
Chengsong
parents:
diff changeset
   117
    case BALTS(bs, rs) => {
Chengsong
parents:
diff changeset
   118
      //assert(bs.length == rs.length - 1)
Chengsong
parents:
diff changeset
   119
      val dist_res = {//all S
Chengsong
parents:
diff changeset
   120
        val rs_simp = rs.map(strong_br_simp)
Chengsong
parents:
diff changeset
   121
        val flat_res = stflat(rs_simp)
Chengsong
parents:
diff changeset
   122
        distinctBy(flat_res, berase)
Chengsong
parents:
diff changeset
   123
      }
Chengsong
parents:
diff changeset
   124
      dist_res match {
Chengsong
parents:
diff changeset
   125
        case Nil => {BZERO}
Chengsong
parents:
diff changeset
   126
        case s :: Nil => { s}
Chengsong
parents:
diff changeset
   127
        case rs => {BALTS(bs, rs)}
Chengsong
parents:
diff changeset
   128
      }
Chengsong
parents:
diff changeset
   129
    }
Chengsong
parents:
diff changeset
   130
    //case BSTAR(r) => BSTAR(r)
Chengsong
parents:
diff changeset
   131
    case r => r
Chengsong
parents:
diff changeset
   132
  }
Chengsong
parents:
diff changeset
   133
Chengsong
parents:
diff changeset
   134
  def break_chain(bs: Bit, rs: List[BRexp]): List[Rexp] = {
Chengsong
parents:
diff changeset
   135
    //do it in a functional style
Chengsong
parents:
diff changeset
   136
    if(bs == S)
Chengsong
parents:
diff changeset
   137
      rs.flatMap(bspill)
Chengsong
parents:
diff changeset
   138
    else
Chengsong
parents:
diff changeset
   139
      List(ALTS(rs.map(berase)))
Chengsong
parents:
diff changeset
   140
  }
Chengsong
parents:
diff changeset
   141
  
1
Chengsong
parents: 0
diff changeset
   142
  //this function converts a brexp into a list of rexps
Chengsong
parents: 0
diff changeset
   143
  //the conversion mainly about this: (a+b)*c -> {a*c, b*c} if a+b is generated from scratch (e.g. when deriving a seq)
Chengsong
parents: 0
diff changeset
   144
  //or is from the original regex but have been "touched" (i.e. have been derived)
Chengsong
parents: 0
diff changeset
   145
  //Why make this distinction? Look at the following example:
Chengsong
parents: 0
diff changeset
   146
  //r = (c+cb)+c(a+b)
Chengsong
parents: 0
diff changeset
   147
  // r\c = (1+b)+(a+b)
Chengsong
parents: 0
diff changeset
   148
  //after simplification
Chengsong
parents: 0
diff changeset
   149
  // (r\c)simp= 1+b+a
Chengsong
parents: 0
diff changeset
   150
  //we lost the structure that tells us 1+b should be grouped together and a grouped as itself
Chengsong
parents: 0
diff changeset
   151
  //however in our brexp simplification, 
Chengsong
parents: 0
diff changeset
   152
  //(r\c)br_simp = 1+b+(a+b)
Chengsong
parents: 0
diff changeset
   153
  //we do not allow the bracket to be opened as it is from the original expression and have not been touched
Chengsong
parents: 0
diff changeset
   154
  def bspill(r: BRexp): List[Rexp] = {
Chengsong
parents: 0
diff changeset
   155
      r match {
0
Chengsong
parents:
diff changeset
   156
          case BALTS(bs, rs) => {
Chengsong
parents:
diff changeset
   157
            break_chain(bs, rs)
Chengsong
parents:
diff changeset
   158
          }
Chengsong
parents:
diff changeset
   159
          case BSEQ(r2, r3) => bspill(r2).map( a => if(a == ONE) berase(r3) else SEQ(a, berase(r3)) )
Chengsong
parents:
diff changeset
   160
          case BZERO => List()
Chengsong
parents:
diff changeset
   161
          case r => List(berase(r))
Chengsong
parents:
diff changeset
   162
      }
Chengsong
parents:
diff changeset
   163
    
Chengsong
parents:
diff changeset
   164
  }
Chengsong
parents:
diff changeset
   165
Chengsong
parents:
diff changeset
   166
}