progs/scala/re-bit2.scala
changeset 360 e752d84225ec
parent 359 fedc16924b76
child 416 57182b36ec01
equal deleted inserted replaced
359:fedc16924b76 360:e752d84225ec
     1 import scala.language.implicitConversions    
     1 import scala.language.implicitConversions    
     2 import scala.language.reflectiveCalls
     2 import scala.language.reflectiveCalls
     3 import scala.annotation.tailrec   
     3 import scala.annotation.tailrec   
     4 
     4 
     5 
     5 // standard regular expressions
     6 abstract class Rexp 
     6 abstract class Rexp 
     7 case object ZERO extends Rexp
     7 case object ZERO extends Rexp
     8 case object ONE extends Rexp
     8 case object ONE extends Rexp
     9 case class CHAR(c: Char) extends Rexp
     9 case class CHAR(c: Char) extends Rexp
    10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    10 case class ALT(r1: Rexp, r2: Rexp) extends Rexp 
    11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    11 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
    12 case class STAR(r: Rexp) extends Rexp 
    12 case class STAR(r: Rexp) extends Rexp 
    13 
    13 
    14 
       
    15 
       
    16 abstract class Bit
    14 abstract class Bit
    17 case object Z extends Bit
    15 case object Z extends Bit
    18 case object S extends Bit
    16 case object S extends Bit
    19 
    17 
    20 type Bits = List[Bit]
    18 type Bits = List[Bit]
    21 
    19 
       
    20 // annotated regular expressions
    22 abstract class ARexp 
    21 abstract class ARexp 
    23 case object AZERO extends ARexp
    22 case object AZERO extends ARexp
    24 case class AONE(bs: Bits) extends ARexp
    23 case class AONE(bs: Bits) extends ARexp
    25 case class ACHAR(bs: Bits, c: Char) extends ARexp
    24 case class ACHAR(bs: Bits, c: Char) extends ARexp
    26 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
    25 case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
    27 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
    26 case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
    28 case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
    27 case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
    29 
    28 
       
    29 // an abbreviation for binary alternatives
    30 def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))
    30 def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))
    31 
    31 
    32 abstract class Val
    32 abstract class Val
    33 case object Empty extends Val
    33 case object Empty extends Val
    34 case class Chr(c: Char) extends Val
    34 case class Chr(c: Char) extends Val
    35 case class Sequ(v1: Val, v2: Val) extends Val
    35 case class Sequ(v1: Val, v2: Val) extends Val
    36 case class Left(v: Val) extends Val
    36 case class Left(v: Val) extends Val
    37 case class Right(v: Val) extends Val
    37 case class Right(v: Val) extends Val
    38 case class Stars(vs: List[Val]) extends Val
    38 case class Stars(vs: List[Val]) extends Val
    39 case class Position(i: Int, v: Val) extends Val  // for testing purposes
    39 
    40 case object Undefined extends Val                // for testing purposes
       
    41    
    40    
    42 // some convenience for typing in regular expressions
    41 // some convenience for typing in regular expressions
    43 def charlist2rexp(s: List[Char]): Rexp = s match {
    42 def charlist2rexp(s: List[Char]): Rexp = s match {
    44   case Nil => ONE
    43   case Nil => ONE
    45   case c::Nil => CHAR(c)
    44   case c::Nil => CHAR(c)
    60   def ~ (r: Rexp) = SEQ(s, r)
    59   def ~ (r: Rexp) = SEQ(s, r)
    61   def ~ (r: String) = SEQ(s, r)
    60   def ~ (r: String) = SEQ(s, r)
    62 }
    61 }
    63 
    62 
    64 
    63 
    65 // nullable function: tests whether the regular 
       
    66 // expression can recognise the empty string
       
    67 def nullable(r: Rexp) : Boolean = r match {
       
    68   case ZERO => false
       
    69   case ONE => true
       
    70   case CHAR(_) => false
       
    71   case ALT(r1, r2) => nullable(r1) || nullable(r2)
       
    72   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
       
    73   case STAR(_) => true
       
    74 }
       
    75 
       
    76 // derivative of a regular expression w.r.t. a character
       
    77 def der(c: Char, r: Rexp) : Rexp = r match {
       
    78   case ZERO => ZERO
       
    79   case ONE => ZERO
       
    80   case CHAR(d) => if (c == d) ONE else ZERO
       
    81   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
       
    82   case SEQ(r1, r2) => 
       
    83     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
       
    84     else SEQ(der(c, r1), r2)
       
    85   case STAR(r) => SEQ(der(c, r), STAR(r))
       
    86 }
       
    87 
       
    88 // derivative w.r.t. a string (iterates der)
       
    89 def ders(s: List[Char], r: Rexp) : Rexp = s match {
       
    90   case Nil => r
       
    91   case c::s => ders(s, der(c, r))
       
    92 }
       
    93 
       
    94 // mkeps and injection part
       
    95 def mkeps(r: Rexp) : Val = r match {
       
    96   case ONE => Empty
       
    97   case ALT(r1, r2) => 
       
    98     if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
       
    99   case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
       
   100   case STAR(r) => Stars(Nil)
       
   101 }
       
   102 
       
   103 
       
   104 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
       
   105   case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
       
   106   case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
       
   107   case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
       
   108   case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
       
   109   case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1))
       
   110   case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2))
       
   111   case (CHAR(d), Empty) => Chr(c) 
       
   112 }
       
   113 
       
   114 // main lexing function (produces a value)
       
   115 // - no simplification
       
   116 def lex(r: Rexp, s: List[Char]) : Val = s match {
       
   117   case Nil => if (nullable(r)) mkeps(r) 
       
   118               else throw new Exception("Not matched")
       
   119   case c::cs => inj(r, c, lex(der(c, r), cs))
       
   120 }
       
   121 
       
   122 def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
       
   123 
       
   124 
       
   125 
       
   126 // Bitcoded + Annotation
    64 // Bitcoded + Annotation
   127 //=======================
    65 //=======================
   128 
    66 
   129 //erase function: extracts the regx from Aregex
    67 //erase function: extracts the Rexp from ARexp
   130 def erase(r:ARexp): Rexp = r match{
    68 def erase(r:ARexp): Rexp = r match{
   131   case AZERO => ZERO
    69   case AZERO => ZERO
   132   case AONE(_) => ONE
    70   case AONE(_) => ONE
   133   case ACHAR(bs, c) => CHAR(c)
    71   case ACHAR(bs, c) => CHAR(c)
   134   case AALTS(bs, Nil) => ZERO
    72   case AALTS(bs, Nil) => ZERO
   136   case AALTS(bs, r::rs) => ALT(erase(r), erase(AALTS(bs, rs)))
    74   case AALTS(bs, r::rs) => ALT(erase(r), erase(AALTS(bs, rs)))
   137   case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
    75   case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
   138   case ASTAR(cs, r)=> STAR(erase(r))
    76   case ASTAR(cs, r)=> STAR(erase(r))
   139 }
    77 }
   140 
    78 
   141 // translation into ARexps
       
   142 def fuse(bs: Bits, r: ARexp) : ARexp = r match {
    79 def fuse(bs: Bits, r: ARexp) : ARexp = r match {
   143   case AZERO => AZERO
    80   case AZERO => AZERO
   144   case AONE(cs) => AONE(bs ++ cs)
    81   case AONE(cs) => AONE(bs ++ cs)
   145   case ACHAR(cs, c) => ACHAR(bs ++ cs, c)
    82   case ACHAR(cs, c) => ACHAR(bs ++ cs, c)
   146   case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
    83   case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
   159 
    96 
   160 
    97 
   161 internalise(("a" | "ab") ~ ("b" | ""))
    98 internalise(("a" | "ab") ~ ("b" | ""))
   162 
    99 
   163 
   100 
   164 
   101 // decoding of a value from a bitsequence
   165 def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
   102 def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
   166   case (ONE, bs) => (Empty, bs)
   103   case (ONE, bs) => (Empty, bs)
   167   case (CHAR(c), bs) => (Chr(c), bs)
   104   case (CHAR(c), bs) => (Chr(c), bs)
   168   case (ALT(r1, r2), Z::bs) => {
   105   case (ALT(r1, r2), Z::bs) => {
   169     val (v, bs1) = decode_aux(r1, bs)
   106     val (v, bs1) = decode_aux(r1, bs)
   200   case Stars(Nil) => List(S)
   137   case Stars(Nil) => List(S)
   201   case Stars(v::vs) =>  Z :: encode(v) ::: encode(Stars(vs))
   138   case Stars(v::vs) =>  Z :: encode(v) ::: encode(Stars(vs))
   202 }
   139 }
   203 
   140 
   204 
   141 
   205 // nullable function: tests whether the aregular 
   142 // nullable function: tests whether the an (annotated) 
   206 // expression can recognise the empty string
   143 // regular expression can recognise the empty string
   207 def bnullable (r: ARexp) : Boolean = r match {
   144 def bnullable (r: ARexp) : Boolean = r match {
   208   case AZERO => false
   145   case AZERO => false
   209   case AONE(_) => true
   146   case AONE(_) => true
   210   case ACHAR(_,_) => false
   147   case ACHAR(_,_) => false
   211   case AALTS(_, rs) => rs.exists(bnullable)
   148   case AALTS(_, rs) => rs.exists(bnullable)
   232     if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(bmkeps(r1), bder(c, r2)))
   169     if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(bmkeps(r1), bder(c, r2)))
   233     else ASEQ(bs, bder(c, r1), r2)
   170     else ASEQ(bs, bder(c, r1), r2)
   234   case ASTAR(bs, r) => ASEQ(bs, fuse(List(Z), bder(c, r)), ASTAR(Nil, r))
   171   case ASTAR(bs, r) => ASEQ(bs, fuse(List(Z), bder(c, r)), ASTAR(Nil, r))
   235 }
   172 }
   236 
   173 
   237 // derivative w.r.t. a string (iterates der)
   174 // derivative w.r.t. a string (iterates bder)
   238 @tailrec
   175 @tailrec
   239 def bders (s: List[Char], r: ARexp) : ARexp = s match {
   176 def bders (s: List[Char], r: ARexp) : ARexp = s match {
   240   case Nil => r
   177   case Nil => r
   241   case c::s => bders(s, bder(c, r))
   178   case c::s => bders(s, bder(c, r))
   242 }
   179 }
   250 def pre_blexing(r: ARexp, s: String)  : Bits = blex(r, s.toList)
   187 def pre_blexing(r: ARexp, s: String)  : Bits = blex(r, s.toList)
   251 def blexing(r: Rexp, s: String) : Val = decode(r, pre_blexing(internalise(r), s))
   188 def blexing(r: Rexp, s: String) : Val = decode(r, pre_blexing(internalise(r), s))
   252 
   189 
   253 
   190 
   254 // example by Tudor
   191 // example by Tudor
   255 val reg = ((("a".%)) ~ ("b" | "c")).%
   192 //val reg = ((("a".%)) ~ ("b" | "c")).%
   256 
   193 
   257 println(blexing(reg, "aab"))
   194 //println(blexing(reg, "aab"))
   258 
   195 
   259 
   196 
   260 //=======================
   197 //=======================
   261 // simplification 
   198 // simplification 
   262 //
   199 //
   287       case (_, AZERO) => AZERO
   224       case (_, AZERO) => AZERO
   288       case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   225       case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
   289       //case (AALTS(bs, rs), r2) => AALTS(bs, rs.map(ASEQ(Nil, _, r2)))
   226       //case (AALTS(bs, rs), r2) => AALTS(bs, rs.map(ASEQ(Nil, _, r2)))
   290       case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   227       case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
   291   }
   228   }
   292   case AALTS(bs1, rs) => distinctBy(flts(rs.map(bsimp)), erase) match {
   229   case AALTS(bs1, rs) => (flts(rs.map(bsimp))).distinctBy(erase) match {
   293       case Nil => AZERO
   230       case Nil => AZERO
   294       case r::Nil => fuse(bs1, r)
   231       case r::Nil => fuse(bs1, r)
   295       case rs => AALTS(bs1, rs)
   232       case rs => AALTS(bs1, rs)
   296   }
   233   }
   297   case r => r
   234   case r => r
   304 }
   241 }
   305 
   242 
   306 def blexing_simp(r: Rexp, s: String) : Val = 
   243 def blexing_simp(r: Rexp, s: String) : Val = 
   307   decode(r, blex_simp(internalise(r), s.toList))
   244   decode(r, blex_simp(internalise(r), s.toList))
   308 
   245 
   309 println(blexing_simp(reg, "aab"))
   246 //println(blexing_simp(reg, "aab"))
   310 
   247 
   311 // bsimp2 by Chengsong
       
   312 
       
   313 def pos_i(rs: List[ARexp], v: Val): Int = (rs, v) match {
       
   314     case (r::Nil,        v1) => 0
       
   315     case ( r::rs1, Right(v)) => pos_i(rs1, v) + 1
       
   316     case ( r::rs1, Left(v) ) => 0
       
   317 }
       
   318 
       
   319 def pos_v(rs: List[ARexp], v: Val): Val = (rs, v) match {
       
   320     case (r::Nil,       v1) => v1
       
   321     case (r::rs1, Right(v)) => pos_v(rs1, v)
       
   322     case (r::rs1, Left(v) ) => v
       
   323 }
       
   324 
       
   325 def unify(rs: List[ARexp], v: Val): Val = {
       
   326   Position(pos_i(rs, v), pos_v(rs, v))
       
   327 }
       
   328 
       
   329 // coat does the job of "coating" a value
       
   330 // given the number of right outside it
       
   331 def coat(v: Val, i: Int) : Val = i match {
       
   332   case 0 => v
       
   333   case i => if (i > 0) coat(Right(v), i - 1) else { println(v, i); throw new Exception("coat minus")}
       
   334 }
       
   335 
       
   336 def distinctBy2[B, C](v: Val, xs: List[B], f: B => C, acc: List[C] = Nil, res: List[B] = Nil): (List[B], Val) = xs match {
       
   337     case Nil => (res, v)
       
   338     case (x::xs) => {
       
   339       val re = f(x)
       
   340       if (acc.contains(re)) v match { 
       
   341         case Position(i, v0) => distinctBy2(Position(i - 1, v0), xs, f, acc, res)  
       
   342         case _ => throw new Exception("dB2")
       
   343       }
       
   344       else distinctBy2(v, xs, f, re::acc, x::res)
       
   345     }
       
   346   }
       
   347 
       
   348 
       
   349 def flats(rs: List[ARexp]): List[ARexp] = rs match {
       
   350       case Nil => Nil
       
   351       case AZERO :: rs1 => flats(rs1)
       
   352       case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
       
   353       case r1 :: rs2 => r1 :: flats(rs2)
       
   354   }
       
   355 
       
   356 
       
   357 
       
   358 def flats2(front: List[ARexp], i: Int, rs: List[ARexp], v: Val): (List[ARexp], Val) = v match {
       
   359     case Position(j, v0) => {
       
   360       if (i < 0) (front ::: flats(rs), Position(j, v0) ) 
       
   361       else if(i == 0){
       
   362         rs match {
       
   363           case AALTS(bs, rs1) :: rs2 => ( (front ::: rs1.map(fuse(bs, _))):::flats(rs2), Position(j + rs1.length - 1, pos_v(rs1, v0)))
       
   364           case r::rs2 => (front ::: List(r) ::: flats(rs2), Position(j, v0))
       
   365           case _ => throw new Exception("flats2 i = 0")
       
   366         }
       
   367       }
       
   368       else{
       
   369         rs match {
       
   370           case AZERO::rs1 => flats2(front, i - 1, rs1, Position(j - 1, v0))
       
   371           case AALTS(bs, rs1) ::rs2 => flats2(front:::rs1.map(fuse(bs, _)), i - 1, rs2, Position(j + rs1.length - 1, v0))
       
   372           case r::rs1 => flats2(front:::List(r), i - 1, rs1, Position(j, v0)) 
       
   373           case _ => throw new Exception("flats2 i>0")
       
   374         }
       
   375       }
       
   376     }
       
   377     case _ => throw new Exception("flats2 error")
       
   378   }
       
   379 
       
   380 def deunify(rs_length: Int, v: Val): Val = v match{
       
   381   case Position(i, v0) => {
       
   382       if (i <0 ) { println(rs_length, v); throw new Exception("deunify minus") } 
       
   383       else if (rs_length == 1) {println(v); v}
       
   384       else if (rs_length - 1 == i) coat(v0, i) 
       
   385       else coat(Left(v0), i)
       
   386   }
       
   387   case _ => throw new Exception("deunify error")
       
   388 }
       
   389 
       
   390 
       
   391 def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r, v) match {
       
   392     case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match {
       
   393         case ((AZERO, _), (_, _)) => (AZERO, Undefined)
       
   394         case ((_, _), (AZERO, _)) => (AZERO, Undefined)
       
   395         case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s)
       
   396                  // v2 tells how to retrieve bits in r2s, which is enough as the bits 
       
   397                  // of the first part of the sequence has already been integrated to 
       
   398                  // the top level of the second regx.
       
   399         case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s),  Sequ(v1s, v2s))
       
   400     }
       
   401     case (AALTS(bs1, rs), v) => {
       
   402       val vlist = unify(rs, v)
       
   403       vlist match {
       
   404         case Position(i, v0) => {
       
   405           val v_simp = bsimp2(rs(i), v0)._2
       
   406           val rs_simp = rs.map(bsimp)
       
   407           val flat_res = flats2( Nil, i, rs_simp, Position(i, v_simp) )
       
   408           val dist_res = distinctBy2(flat_res._2, flat_res._1, erase)
       
   409           val rs_new = dist_res._1
       
   410           val v_new = deunify(rs_new.length, dist_res._2)
       
   411           rs_new match {
       
   412             case Nil => (AZERO, Undefined)
       
   413             case s :: Nil => (fuse(bs1, s), v_new)
       
   414             case rs => (AALTS(bs1, rs), v_new)
       
   415           }
       
   416         }
       
   417         case _ => throw new Exception("Funny vlist bsimp2")
       
   418       }
       
   419     }
       
   420     // STAR case
       
   421     // case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
       
   422     case (r, v) => (r, v)  
       
   423   }
       
   424 
       
   425 
       
   426 
       
   427 val dr = ASEQ(List(),AALTS(List(S),List(AONE(List(Z)), AONE(List(S)))),ASTAR(List(),AALTS(List(),List(ACHAR(List(Z),'a'), ACHAR(List(S),'a')))))
       
   428 val dv = Sequ(Left(Empty),Stars(List()))
       
   429 println(bsimp2(dr, dv))
       
   430   
       
   431 
       
   432 /*
       
   433 def blex_simp2(r: ARexp, s: List[Char]) : Bits = s match {
       
   434   case Nil => if (bnullable(r)) bmkeps(r) 
       
   435               else throw new Exception("Not matched")
       
   436   case c::cs => blex(bsimp2(bder(c, r)), cs)
       
   437 }
       
   438 
       
   439 def blexing_simp2(r: Rexp, s: String) : Val = 
       
   440   decode(r, blex_simp2(internalise(r), s.toList))
       
   441 
       
   442 println(blexing_simp2(reg, "aab"))
       
   443 */
       
   444 
   248 
   445 // extracts a string from value
   249 // extracts a string from value
   446 def flatten(v: Val) : String = v match {
   250 def flatten(v: Val) : String = v match {
   447   case Empty => ""
   251   case Empty => ""
   448   case Chr(c) => c.toString
   252   case Chr(c) => c.toString