lex_blex_Frankensteined.scala
author Chengsong
Thu, 23 Jan 2020 22:49:19 +0000
changeset 111 af2c63f9bdf9
parent 109 79f347cb8b4d
child 148 c8ef391dd6f7
permissions -rw-r--r--
refined section a bit
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
Chengsong
parents:
diff changeset
     1
package RexpRelated
Chengsong
parents:
diff changeset
     2
import scala.language.implicitConversions    
Chengsong
parents:
diff changeset
     3
import scala.language.reflectiveCalls
Chengsong
parents:
diff changeset
     4
import scala.annotation.tailrec   
Chengsong
parents:
diff changeset
     5
import scala.util.Try
Chengsong
parents:
diff changeset
     6
Chengsong
parents:
diff changeset
     7
abstract class Bit
Chengsong
parents:
diff changeset
     8
case object Z extends Bit
Chengsong
parents:
diff changeset
     9
case object S extends Bit
12
768b833d6230 removed C(c) The retrieve and code in the previous version is still not correct and will crash. no prob now.
Chengsong
parents: 11
diff changeset
    10
//case class C(c: Char) extends Bit
0
Chengsong
parents:
diff changeset
    11
Chengsong
parents:
diff changeset
    12
Chengsong
parents:
diff changeset
    13
abstract class Rexp 
Chengsong
parents:
diff changeset
    14
case object ZERO extends Rexp
Chengsong
parents:
diff changeset
    15
case object ONE extends Rexp
Chengsong
parents:
diff changeset
    16
case class CHAR(c: Char) extends Rexp
Chengsong
parents:
diff changeset
    17
case class ALTS(rs: List[Rexp]) extends Rexp 
Chengsong
parents:
diff changeset
    18
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
Chengsong
parents:
diff changeset
    19
case class STAR(r: Rexp) extends Rexp 
Chengsong
parents:
diff changeset
    20
case class RECD(x: String, r: Rexp) extends Rexp
Chengsong
parents:
diff changeset
    21
Chengsong
parents:
diff changeset
    22
Chengsong
parents:
diff changeset
    23
Chengsong
parents:
diff changeset
    24
object Rexp{
Chengsong
parents:
diff changeset
    25
  type Bits = List[Bit]
Chengsong
parents:
diff changeset
    26
  // abbreviations
Chengsong
parents:
diff changeset
    27
  type Mon = (Char, Rexp)
Chengsong
parents:
diff changeset
    28
  type Lin = Set[Mon]
Chengsong
parents:
diff changeset
    29
  def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2))
Chengsong
parents:
diff changeset
    30
  def PLUS(r: Rexp) = SEQ(r, STAR(r))
Chengsong
parents:
diff changeset
    31
  def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))
Chengsong
parents:
diff changeset
    32
Chengsong
parents:
diff changeset
    33
Chengsong
parents:
diff changeset
    34
  def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
Chengsong
parents:
diff changeset
    35
    case Nil => Nil
Chengsong
parents:
diff changeset
    36
    case (x::xs) => {
Chengsong
parents:
diff changeset
    37
      val res = f(x)
Chengsong
parents:
diff changeset
    38
      if (acc.contains(res)) distinctBy(xs, f, acc)  
Chengsong
parents:
diff changeset
    39
      else x::distinctBy(xs, f, res::acc)
Chengsong
parents:
diff changeset
    40
    }
Chengsong
parents:
diff changeset
    41
  } 
Chengsong
parents:
diff changeset
    42
  // some convenience for typing in regular expressions
Chengsong
parents:
diff changeset
    43
  def charlist2rexp(s : List[Char]): Rexp = s match {
Chengsong
parents:
diff changeset
    44
    case Nil => ONE
Chengsong
parents:
diff changeset
    45
    case c::Nil => CHAR(c)
Chengsong
parents:
diff changeset
    46
    case c::s => SEQ(CHAR(c), charlist2rexp(s))
Chengsong
parents:
diff changeset
    47
  }
Chengsong
parents:
diff changeset
    48
  implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
Chengsong
parents:
diff changeset
    49
Chengsong
parents:
diff changeset
    50
  implicit def RexpOps(r: Rexp) = new {
Chengsong
parents:
diff changeset
    51
    def | (s: Rexp) = ALT(r, s)
Chengsong
parents:
diff changeset
    52
    def % = STAR(r)
Chengsong
parents:
diff changeset
    53
    def ~ (s: Rexp) = SEQ(r, s)
Chengsong
parents:
diff changeset
    54
  }
Chengsong
parents:
diff changeset
    55
Chengsong
parents:
diff changeset
    56
  implicit def stringOps(s: String) = new {
Chengsong
parents:
diff changeset
    57
    def | (r: Rexp) = ALT(s, r)
Chengsong
parents:
diff changeset
    58
    def | (r: String) = ALT(s, r)
Chengsong
parents:
diff changeset
    59
    def % = STAR(s)
Chengsong
parents:
diff changeset
    60
    def ~ (r: Rexp) = SEQ(s, r)
Chengsong
parents:
diff changeset
    61
    def ~ (r: String) = SEQ(s, r)
Chengsong
parents:
diff changeset
    62
    def $ (r: Rexp) = RECD(s, r)
Chengsong
parents:
diff changeset
    63
  }
Chengsong
parents:
diff changeset
    64
Chengsong
parents:
diff changeset
    65
  // translation into ARexps
Chengsong
parents:
diff changeset
    66
  def fuse(bs: Bits, r: ARexp) : ARexp = r match {
Chengsong
parents:
diff changeset
    67
    case AZERO => AZERO
Chengsong
parents:
diff changeset
    68
    case AONE(cs) => AONE(bs ++ cs)
Chengsong
parents:
diff changeset
    69
    case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
Chengsong
parents:
diff changeset
    70
    case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
Chengsong
parents:
diff changeset
    71
    case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
Chengsong
parents:
diff changeset
    72
    case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
Chengsong
parents:
diff changeset
    73
  }
Chengsong
parents:
diff changeset
    74
Chengsong
parents:
diff changeset
    75
  def internalise(r: Rexp) : ARexp = r match {
Chengsong
parents:
diff changeset
    76
    case ZERO => AZERO
Chengsong
parents:
diff changeset
    77
    case ONE => AONE(Nil)
Chengsong
parents:
diff changeset
    78
    case CHAR(c) => ACHAR(Nil, c)
Chengsong
parents:
diff changeset
    79
    case ALTS(List(r1, r2)) => 
Chengsong
parents:
diff changeset
    80
      AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
Chengsong
parents:
diff changeset
    81
    case ALTS(r1::rs) => {
Chengsong
parents:
diff changeset
    82
      val AALTS(Nil, rs2) = internalise(ALTS(rs))
Chengsong
parents:
diff changeset
    83
      AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
Chengsong
parents:
diff changeset
    84
    }
Chengsong
parents:
diff changeset
    85
    case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
Chengsong
parents:
diff changeset
    86
    case STAR(r) => ASTAR(Nil, internalise(r))
Chengsong
parents:
diff changeset
    87
    case RECD(x, r) => internalise(r)
Chengsong
parents:
diff changeset
    88
  }
Chengsong
parents:
diff changeset
    89
Chengsong
parents:
diff changeset
    90
  internalise(("a" | "ab") ~ ("b" | ""))
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
    91
0
Chengsong
parents:
diff changeset
    92
  def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
Chengsong
parents:
diff changeset
    93
    case (ONE, bs) => (Empty, bs)
12
768b833d6230 removed C(c) The retrieve and code in the previous version is still not correct and will crash. no prob now.
Chengsong
parents: 11
diff changeset
    94
    case (CHAR(f), bs) => (Chr(f), bs)
768b833d6230 removed C(c) The retrieve and code in the previous version is still not correct and will crash. no prob now.
Chengsong
parents: 11
diff changeset
    95
    case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems only used when we simp a regex before derivatives and it contains things like alt("a")
0
Chengsong
parents:
diff changeset
    96
    case (ALTS(rs), bs) => bs match {
Chengsong
parents:
diff changeset
    97
      case Z::bs1 => {
Chengsong
parents:
diff changeset
    98
        val (v, bs2) = decode_aux(rs.head, bs1)
Chengsong
parents:
diff changeset
    99
        (Left(v), bs2)
Chengsong
parents:
diff changeset
   100
      }
Chengsong
parents:
diff changeset
   101
      case S::bs1 => {
Chengsong
parents:
diff changeset
   102
        val (v, bs2) = decode_aux(ALTS(rs.tail), bs1)
Chengsong
parents:
diff changeset
   103
        (Right(v), bs2)			
Chengsong
parents:
diff changeset
   104
      }
Chengsong
parents:
diff changeset
   105
    }
Chengsong
parents:
diff changeset
   106
    case (SEQ(r1, r2), bs) => {
Chengsong
parents:
diff changeset
   107
      val (v1, bs1) = decode_aux(r1, bs)
Chengsong
parents:
diff changeset
   108
      val (v2, bs2) = decode_aux(r2, bs1)
Chengsong
parents:
diff changeset
   109
      (Sequ(v1, v2), bs2)
Chengsong
parents:
diff changeset
   110
    }
Chengsong
parents:
diff changeset
   111
    case (STAR(r1), S::bs) => {
Chengsong
parents:
diff changeset
   112
      val (v, bs1) = decode_aux(r1, bs)
Chengsong
parents:
diff changeset
   113
      //println(v)
Chengsong
parents:
diff changeset
   114
      val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
Chengsong
parents:
diff changeset
   115
      (Stars(v::vs), bs2)
Chengsong
parents:
diff changeset
   116
    }
Chengsong
parents:
diff changeset
   117
    case (STAR(_), Z::bs) => (Stars(Nil), bs)
Chengsong
parents:
diff changeset
   118
    case (RECD(x, r1), bs) => {
Chengsong
parents:
diff changeset
   119
      val (v, bs1) = decode_aux(r1, bs)
Chengsong
parents:
diff changeset
   120
      (Rec(x, v), bs1)
Chengsong
parents:
diff changeset
   121
    }
109
Chengsong
parents: 107
diff changeset
   122
    case (r, Nil) => (Stars(Nil), Nil)
0
Chengsong
parents:
diff changeset
   123
  }
Chengsong
parents:
diff changeset
   124
Chengsong
parents:
diff changeset
   125
  def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
Chengsong
parents:
diff changeset
   126
    case (v, Nil) => v
Chengsong
parents:
diff changeset
   127
    case _ => throw new Exception("Not decodable")
Chengsong
parents:
diff changeset
   128
  }
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   129
11
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   130
  def code(v: Val): Bits = v match {
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   131
    case Empty => Nil
12
768b833d6230 removed C(c) The retrieve and code in the previous version is still not correct and will crash. no prob now.
Chengsong
parents: 11
diff changeset
   132
    case Chr(a) => Nil
11
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   133
    case Left(v) => Z :: code(v)
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   134
    case Right(v) => S :: code(v)
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   135
    case Sequ(v1, v2) => code(v1) ::: code(v2)
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   136
    case Stars(Nil) => Z::Nil
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   137
    case Stars(v::vs) => S::code(v) ::: code(Stars(vs))
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   138
  }
0
Chengsong
parents:
diff changeset
   139
11
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   140
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   141
  def retrieve(r: ARexp, v: Val): Bits = (r,v) match {
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   142
    case (AONE(bs), Empty) => bs
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   143
    case (ACHAR(bs, c), Chr(d)) => bs
14
610f14009c0b the property
Chengsong
parents: 12
diff changeset
   144
    case (AALTS(bs, a::Nil), v) => bs ++ retrieve(a, v)
11
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   145
    case (AALTS(bs, as), Left(v)) => bs ++ retrieve(as.head,v)
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   146
    case (AALTS(bs, as), Right(v)) => bs ++ retrieve(AALTS(Nil,as.tail),v)
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   147
    case (ASEQ(bs, a1, a2), Sequ(v1, v2)) => bs ++ retrieve(a1, v1) ++ retrieve(a2, v2)
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   148
    case (ASTAR(bs, a), Stars(Nil)) => bs ++ List(Z) 
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   149
    case (ASTAR(bs, a), Stars(v::vs)) => bs ++ List(S) ++ retrieve(a, v) ++ retrieve(ASTAR(Nil, a), Stars(vs))
59
8ff7b7508824 changes1
Chengsong
parents: 17
diff changeset
   150
  }//bug here last clause should not add list(S)
0
Chengsong
parents:
diff changeset
   151
  //erase function: extracts the regx from Aregex
Chengsong
parents:
diff changeset
   152
  def erase(r:ARexp): Rexp = r match{
Chengsong
parents:
diff changeset
   153
    case AZERO => ZERO
Chengsong
parents:
diff changeset
   154
    case AONE(_) => ONE
Chengsong
parents:
diff changeset
   155
    case ACHAR(bs, f) => CHAR(f)
Chengsong
parents:
diff changeset
   156
    case AALTS(bs, rs) => ALTS(rs.map(erase(_)))
Chengsong
parents:
diff changeset
   157
    case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
Chengsong
parents:
diff changeset
   158
    case ASTAR(cs, r)=> STAR(erase(r))
Chengsong
parents:
diff changeset
   159
  }
Chengsong
parents:
diff changeset
   160
Chengsong
parents:
diff changeset
   161
  //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART
Chengsong
parents:
diff changeset
   162
  // nullable function: tests whether the regular 
Chengsong
parents:
diff changeset
   163
  // expression can recognise the empty string
Chengsong
parents:
diff changeset
   164
  def nullable (r: Rexp) : Boolean = r match {
Chengsong
parents:
diff changeset
   165
    case ZERO => false
Chengsong
parents:
diff changeset
   166
    case ONE => true
Chengsong
parents:
diff changeset
   167
    case CHAR(_) => false
Chengsong
parents:
diff changeset
   168
    case ALTS(rs) => rs.exists(nullable)
Chengsong
parents:
diff changeset
   169
    case SEQ(r1, r2) => nullable(r1) && nullable(r2)
Chengsong
parents:
diff changeset
   170
    case STAR(_) => true
Chengsong
parents:
diff changeset
   171
    case RECD(_, r) => nullable(r)
Chengsong
parents:
diff changeset
   172
    //case PLUS(r) => nullable(r)
Chengsong
parents:
diff changeset
   173
  }
Chengsong
parents:
diff changeset
   174
Chengsong
parents:
diff changeset
   175
  // derivative of a regular expression w.r.t. a character
Chengsong
parents:
diff changeset
   176
  def der (c: Char, r: Rexp) : Rexp = r match {
Chengsong
parents:
diff changeset
   177
    case ZERO => ZERO
Chengsong
parents:
diff changeset
   178
    case ONE => ZERO
Chengsong
parents:
diff changeset
   179
    case CHAR(f) => if (c == f) ONE else ZERO
Chengsong
parents:
diff changeset
   180
    case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2)))
Chengsong
parents:
diff changeset
   181
    case SEQ(r1, r2) => 
Chengsong
parents:
diff changeset
   182
      if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2)))
Chengsong
parents:
diff changeset
   183
      else SEQ(der(c, r1), r2)
Chengsong
parents:
diff changeset
   184
    case STAR(r) => SEQ(der(c, r), STAR(r))
Chengsong
parents:
diff changeset
   185
    case RECD(_, r1) => der(c, r1)
Chengsong
parents:
diff changeset
   186
    //case PLUS(r) => SEQ(der(c, r), STAR(r))
Chengsong
parents:
diff changeset
   187
  }
Chengsong
parents:
diff changeset
   188
92
Chengsong
parents: 59
diff changeset
   189
  def ders (s: List[Char], r: Rexp) : Rexp = s match {
Chengsong
parents: 59
diff changeset
   190
    case Nil => r
Chengsong
parents: 59
diff changeset
   191
    case c::s => ders(s, der(c, r))
Chengsong
parents: 59
diff changeset
   192
  }
Chengsong
parents: 59
diff changeset
   193
Chengsong
parents: 59
diff changeset
   194
def der_seqo(r:Rexp, s: List[Char],acc: List[Rexp]) : List[Rexp] = s match{
Chengsong
parents: 59
diff changeset
   195
    case Nil => acc ::: List(r)
Chengsong
parents: 59
diff changeset
   196
    case c::cs => der_seqo(der(c, r), cs, acc ::: List(r))
Chengsong
parents: 59
diff changeset
   197
  }
Chengsong
parents: 59
diff changeset
   198
  def der_seq_revo(r:Rexp, s: List[Char], acc: List[Rexp]): List[Rexp] = s match{
Chengsong
parents: 59
diff changeset
   199
    case Nil => r::acc
Chengsong
parents: 59
diff changeset
   200
    case c::cs =>der_seq_revo(r, cs, ders(s, r) :: acc  )
Chengsong
parents: 59
diff changeset
   201
  }
Chengsong
parents: 59
diff changeset
   202
  def re_closeo(l1: List[Rexp], l2: List[Rexp], re_init: Rexp): Rexp = l1 match {
Chengsong
parents: 59
diff changeset
   203
    case Nil => re_init
Chengsong
parents: 59
diff changeset
   204
    case c::cs => if(nullable(c)) re_closeo(cs, l2.tail, ALT(re_init,  l2.head)  ) 
Chengsong
parents: 59
diff changeset
   205
    else re_closeo(cs, l2.tail, re_init)
Chengsong
parents: 59
diff changeset
   206
  }
93
Chengsong
parents: 92
diff changeset
   207
92
Chengsong
parents: 59
diff changeset
   208
  //HERE
Chengsong
parents: 59
diff changeset
   209
  def closed_string_dero(r1: Rexp, r2: Rexp, s: List[Char]): Rexp = {
Chengsong
parents: 59
diff changeset
   210
    val l1 = der_seqo(r1, s, Nil)
Chengsong
parents: 59
diff changeset
   211
    val l2 = der_seq_revo(r2, s, Nil)
Chengsong
parents: 59
diff changeset
   212
    val Re = re_closeo((l1.reverse).tail, l2.tail, SEQ(l1.last, l2.head))
Chengsong
parents: 59
diff changeset
   213
    Re
Chengsong
parents: 59
diff changeset
   214
  }
Chengsong
parents: 59
diff changeset
   215
  //derivative w.r.t string
Chengsong
parents: 59
diff changeset
   216
def ders2(s: List[Char], r: Rexp) : Rexp = (s, r) match {
Chengsong
parents: 59
diff changeset
   217
  case (Nil, r) => r
Chengsong
parents: 59
diff changeset
   218
  case (s, ZERO) => ZERO
Chengsong
parents: 59
diff changeset
   219
  case (s, ONE) => if (s == Nil) ONE else ZERO
Chengsong
parents: 59
diff changeset
   220
  case (s, CHAR(c)) => if (s == List(c)) ONE else 
Chengsong
parents: 59
diff changeset
   221
                       if (s == Nil) CHAR(c) else ZERO
Chengsong
parents: 59
diff changeset
   222
  case (s, ALTS(List(r1, r2))) => ALT(ders2(s, r1), ders2(s, r2))
Chengsong
parents: 59
diff changeset
   223
  case (s, SEQ(r1, r2)) => closed_string_dero(r1, r2, s)
Chengsong
parents: 59
diff changeset
   224
  case (c::cs, STAR(r)) => closed_string_dero(der(c, r), STAR(r), cs)
Chengsong
parents: 59
diff changeset
   225
}
Chengsong
parents: 59
diff changeset
   226
0
Chengsong
parents:
diff changeset
   227
  def flatten(v: Val) : String = v match {
Chengsong
parents:
diff changeset
   228
    case Empty => ""
Chengsong
parents:
diff changeset
   229
    case Chr(c) => c.toString
Chengsong
parents:
diff changeset
   230
    case Left(v) => flatten(v)
Chengsong
parents:
diff changeset
   231
    case Right(v) => flatten(v)
Chengsong
parents:
diff changeset
   232
    case Sequ(v1, v2) => flatten(v1) + flatten(v2)
Chengsong
parents:
diff changeset
   233
    case Stars(vs) => vs.map(flatten).mkString
Chengsong
parents:
diff changeset
   234
    case Rec(_, v) => flatten(v)
Chengsong
parents:
diff changeset
   235
  }
Chengsong
parents:
diff changeset
   236
Chengsong
parents:
diff changeset
   237
  // extracts an environment from a value
Chengsong
parents:
diff changeset
   238
  def env(v: Val) : List[(String, String)] = v match {
Chengsong
parents:
diff changeset
   239
    case Empty => Nil
Chengsong
parents:
diff changeset
   240
    case Chr(c) => Nil
Chengsong
parents:
diff changeset
   241
    case Left(v) => env(v)
Chengsong
parents:
diff changeset
   242
    case Right(v) => env(v)
Chengsong
parents:
diff changeset
   243
    case Sequ(v1, v2) => env(v1) ::: env(v2)
Chengsong
parents:
diff changeset
   244
    case Stars(vs) => vs.flatMap(env)
Chengsong
parents:
diff changeset
   245
    case Rec(x, v) => (x, flatten(v))::env(v)
Chengsong
parents:
diff changeset
   246
  }
Chengsong
parents:
diff changeset
   247
Chengsong
parents:
diff changeset
   248
Chengsong
parents:
diff changeset
   249
  // injection part
Chengsong
parents:
diff changeset
   250
  def mkeps(r: Rexp) : Val = r match {
Chengsong
parents:
diff changeset
   251
    case ONE => Empty
Chengsong
parents:
diff changeset
   252
    case ALTS(List(r1, r2)) => 
Chengsong
parents:
diff changeset
   253
      if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
Chengsong
parents:
diff changeset
   254
    case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
Chengsong
parents:
diff changeset
   255
    case STAR(r) => Stars(Nil)
Chengsong
parents:
diff changeset
   256
    case RECD(x, r) => Rec(x, mkeps(r))
Chengsong
parents:
diff changeset
   257
    //case PLUS(r) => Stars(List(mkeps(r)))
Chengsong
parents:
diff changeset
   258
  }
Chengsong
parents:
diff changeset
   259
Chengsong
parents:
diff changeset
   260
  def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
Chengsong
parents:
diff changeset
   261
    case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
Chengsong
parents:
diff changeset
   262
    case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
Chengsong
parents:
diff changeset
   263
    case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
Chengsong
parents:
diff changeset
   264
    case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
Chengsong
parents:
diff changeset
   265
    case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1))
Chengsong
parents:
diff changeset
   266
    case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2))
Chengsong
parents:
diff changeset
   267
    case (CHAR(_), Empty) => Chr(c) 
Chengsong
parents:
diff changeset
   268
    case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
Chengsong
parents:
diff changeset
   269
    //case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
Chengsong
parents:
diff changeset
   270
  }
Chengsong
parents:
diff changeset
   271
  def lex(r: Rexp, s: List[Char]) : Val = s match {
Chengsong
parents:
diff changeset
   272
    case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   273
    case c::cs => inj(r, c, lex(der(c, r), cs))
Chengsong
parents:
diff changeset
   274
  }
Chengsong
parents:
diff changeset
   275
Chengsong
parents:
diff changeset
   276
  def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
Chengsong
parents:
diff changeset
   277
Chengsong
parents:
diff changeset
   278
  // some "rectification" functions for simplification
Chengsong
parents:
diff changeset
   279
  def F_ID(v: Val): Val = v
Chengsong
parents:
diff changeset
   280
  def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
Chengsong
parents:
diff changeset
   281
  def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
Chengsong
parents:
diff changeset
   282
  def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
Chengsong
parents:
diff changeset
   283
    case Right(v) => Right(f2(v))
Chengsong
parents:
diff changeset
   284
    case Left(v) => Left(f1(v))
Chengsong
parents:
diff changeset
   285
  }
Chengsong
parents:
diff changeset
   286
  def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
Chengsong
parents:
diff changeset
   287
    case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
Chengsong
parents:
diff changeset
   288
  }
Chengsong
parents:
diff changeset
   289
  def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
Chengsong
parents:
diff changeset
   290
    (v:Val) => Sequ(f1(Empty), f2(v))
Chengsong
parents:
diff changeset
   291
  def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
Chengsong
parents:
diff changeset
   292
    (v:Val) => Sequ(f1(v), f2(Empty))
Chengsong
parents:
diff changeset
   293
  def F_RECD(f: Val => Val) = (v:Val) => v match {
Chengsong
parents:
diff changeset
   294
    case Rec(x, v) => Rec(x, f(v))
Chengsong
parents:
diff changeset
   295
  }
Chengsong
parents:
diff changeset
   296
  def F_ERROR(v: Val): Val = throw new Exception("error")
Chengsong
parents:
diff changeset
   297
Chengsong
parents:
diff changeset
   298
  // simplification of regular expressions returning also an
Chengsong
parents:
diff changeset
   299
  // rectification function; no simplification under STAR 
Chengsong
parents:
diff changeset
   300
  def simp(r: Rexp): (Rexp, Val => Val) = r match {
Chengsong
parents:
diff changeset
   301
    case ALTS(List(r1, r2)) => {
Chengsong
parents:
diff changeset
   302
      val (r1s, f1s) = simp(r1)
Chengsong
parents:
diff changeset
   303
      val (r2s, f2s) = simp(r2)
Chengsong
parents:
diff changeset
   304
      (r1s, r2s) match {
Chengsong
parents:
diff changeset
   305
        case (ZERO, _) => (r2s, F_RIGHT(f2s))
Chengsong
parents:
diff changeset
   306
        case (_, ZERO) => (r1s, F_LEFT(f1s))
Chengsong
parents:
diff changeset
   307
        case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
Chengsong
parents:
diff changeset
   308
                  else (ALTS(List(r1s, r2s)), F_ALT(f1s, f2s)) 
Chengsong
parents:
diff changeset
   309
      }
Chengsong
parents:
diff changeset
   310
    }
Chengsong
parents:
diff changeset
   311
    case SEQ(r1, r2) => {
Chengsong
parents:
diff changeset
   312
      val (r1s, f1s) = simp(r1)
Chengsong
parents:
diff changeset
   313
      val (r2s, f2s) = simp(r2)
Chengsong
parents:
diff changeset
   314
      (r1s, r2s) match {
Chengsong
parents:
diff changeset
   315
        case (ZERO, _) => (ZERO, F_ERROR)
Chengsong
parents:
diff changeset
   316
        case (_, ZERO) => (ZERO, F_ERROR)
Chengsong
parents:
diff changeset
   317
        case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
Chengsong
parents:
diff changeset
   318
        case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
Chengsong
parents:
diff changeset
   319
        case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
Chengsong
parents:
diff changeset
   320
      }
Chengsong
parents:
diff changeset
   321
    }
Chengsong
parents:
diff changeset
   322
    case RECD(x, r1) => {
Chengsong
parents:
diff changeset
   323
      val (r1s, f1s) = simp(r1)
Chengsong
parents:
diff changeset
   324
      (RECD(x, r1s), F_RECD(f1s))
Chengsong
parents:
diff changeset
   325
    }
Chengsong
parents:
diff changeset
   326
    case r => (r, F_ID)
Chengsong
parents:
diff changeset
   327
  }
Chengsong
parents:
diff changeset
   328
  /*
Chengsong
parents:
diff changeset
   329
  val each_simp_time = scala.collection.mutable.ArrayBuffer.empty[Long]
Chengsong
parents:
diff changeset
   330
  val each_simp_timeb = scala.collection.mutable.ArrayBuffer.empty[Long]
Chengsong
parents:
diff changeset
   331
  */
Chengsong
parents:
diff changeset
   332
  def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
Chengsong
parents:
diff changeset
   333
    case Nil => {
Chengsong
parents:
diff changeset
   334
      if (nullable(r)) {
Chengsong
parents:
diff changeset
   335
        mkeps(r) 
Chengsong
parents:
diff changeset
   336
      }
Chengsong
parents:
diff changeset
   337
      else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   338
    }
Chengsong
parents:
diff changeset
   339
    case c::cs => {
Chengsong
parents:
diff changeset
   340
      val (r_simp, f_simp) = simp(der(c, r))
Chengsong
parents:
diff changeset
   341
      inj(r, c, f_simp(lex_simp(r_simp, cs)))
Chengsong
parents:
diff changeset
   342
    }
Chengsong
parents:
diff changeset
   343
  }
Chengsong
parents:
diff changeset
   344
Chengsong
parents:
diff changeset
   345
  def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
Chengsong
parents:
diff changeset
   346
Chengsong
parents:
diff changeset
   347
  //println(lexing_simp(("a" | "ab") ~ ("b" | ""), "ab"))
Chengsong
parents:
diff changeset
   348
Chengsong
parents:
diff changeset
   349
  // filters out all white spaces
Chengsong
parents:
diff changeset
   350
  def tokenise(r: Rexp, s: String) = 
Chengsong
parents:
diff changeset
   351
    env(lexing_simp(r, s)).filterNot { _._1 == "w"}
Chengsong
parents:
diff changeset
   352
Chengsong
parents:
diff changeset
   353
Chengsong
parents:
diff changeset
   354
  //reads the string from a file 
Chengsong
parents:
diff changeset
   355
  def fromFile(name: String) : String = 
Chengsong
parents:
diff changeset
   356
    io.Source.fromFile(name).mkString
Chengsong
parents:
diff changeset
   357
Chengsong
parents:
diff changeset
   358
  def tokenise_file(r: Rexp, name: String) = 
Chengsong
parents:
diff changeset
   359
    tokenise(r, fromFile(name))
Chengsong
parents:
diff changeset
   360
  
Chengsong
parents:
diff changeset
   361
  //   Testing
Chengsong
parents:
diff changeset
   362
  //============
Chengsong
parents:
diff changeset
   363
Chengsong
parents:
diff changeset
   364
  def time[T](code: => T) = {
Chengsong
parents:
diff changeset
   365
    val start = System.nanoTime()
Chengsong
parents:
diff changeset
   366
    val result = code
Chengsong
parents:
diff changeset
   367
    val end = System.nanoTime()
Chengsong
parents:
diff changeset
   368
    println((end - start)/1.0e9)
Chengsong
parents:
diff changeset
   369
    result
Chengsong
parents:
diff changeset
   370
  }
Chengsong
parents:
diff changeset
   371
Chengsong
parents:
diff changeset
   372
  //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART
Chengsong
parents:
diff changeset
   373
Chengsong
parents:
diff changeset
   374
  // bnullable function: tests whether the aregular 
Chengsong
parents:
diff changeset
   375
  // expression can recognise the empty string
Chengsong
parents:
diff changeset
   376
  def bnullable (r: ARexp) : Boolean = r match {
Chengsong
parents:
diff changeset
   377
    case AZERO => false
Chengsong
parents:
diff changeset
   378
    case AONE(_) => true
Chengsong
parents:
diff changeset
   379
    case ACHAR(_,_) => false
Chengsong
parents:
diff changeset
   380
    case AALTS(_, rs) => rs.exists(bnullable)
Chengsong
parents:
diff changeset
   381
    case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
Chengsong
parents:
diff changeset
   382
    case ASTAR(_, _) => true
Chengsong
parents:
diff changeset
   383
  }
Chengsong
parents:
diff changeset
   384
Chengsong
parents:
diff changeset
   385
  def mkepsBC(r: ARexp) : Bits = r match {
Chengsong
parents:
diff changeset
   386
    case AONE(bs) => bs
Chengsong
parents:
diff changeset
   387
    case AALTS(bs, rs) => {
Chengsong
parents:
diff changeset
   388
      val n = rs.indexWhere(bnullable)
Chengsong
parents:
diff changeset
   389
      bs ++ mkepsBC(rs(n))
Chengsong
parents:
diff changeset
   390
    }
Chengsong
parents:
diff changeset
   391
    case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
Chengsong
parents:
diff changeset
   392
    case ASTAR(bs, r) => bs ++ List(Z)
Chengsong
parents:
diff changeset
   393
  }
Chengsong
parents:
diff changeset
   394
Chengsong
parents:
diff changeset
   395
  // derivative of a regular expression w.r.t. a character
Chengsong
parents:
diff changeset
   396
  def bder(c: Char, r: ARexp) : ARexp = r match {
Chengsong
parents:
diff changeset
   397
    case AZERO => AZERO
Chengsong
parents:
diff changeset
   398
    case AONE(_) => AZERO
12
768b833d6230 removed C(c) The retrieve and code in the previous version is still not correct and will crash. no prob now.
Chengsong
parents: 11
diff changeset
   399
    case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
0
Chengsong
parents:
diff changeset
   400
    case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
Chengsong
parents:
diff changeset
   401
    case ASEQ(bs, r1, r2) => 
Chengsong
parents:
diff changeset
   402
      if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(mkepsBC(r1), bder(c, r2)))
Chengsong
parents:
diff changeset
   403
      else ASEQ(bs, bder(c, r1), r2)
Chengsong
parents:
diff changeset
   404
    case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
Chengsong
parents:
diff changeset
   405
  }
107
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   406
  def bder_rf(c: Char, r: ARexp) : ARexp = r match {
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   407
    case AZERO => AZERO
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   408
    case AONE(_) => AZERO
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   409
    case ACHAR(bs, f) => if (c == f) AONE(bs) else AZERO
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   410
    case AALTS(bs, rs) => AALTS(bs, rs.map(bder_rf(c, _)))
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   411
    case ASEQ(bs, r1, r2) =>
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   412
      if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder_rf(c, r1), r2), fuse(mkepsBC(r1), bder_rf(c, r2)))
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   413
      else ASEQ(bs, bder_rf(c, r1), r2)
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   414
    case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder_rf(c, r)), ASTAR(Nil, r))
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   415
  }
0
Chengsong
parents:
diff changeset
   416
  // derivative w.r.t. a string (iterates bder)
Chengsong
parents:
diff changeset
   417
  @tailrec
Chengsong
parents:
diff changeset
   418
  def bders (s: List[Char], r: ARexp) : ARexp = s match {
Chengsong
parents:
diff changeset
   419
    case Nil => r
Chengsong
parents:
diff changeset
   420
    case c::s => bders(s, bder(c, r))
Chengsong
parents:
diff changeset
   421
  }
107
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   422
  
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   423
  def bders_rf(s: List[Char], r: ARexp) : ARexp = s match {
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   424
    case Nil => r
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   425
    case c::s => bders_rf(s, bder_rf(c, r))
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   426
  }
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   427
  def all_zero_except_alt(rs: List[ARexp], a: ARexp): ARexp = rs match{
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   428
    case Nil => a
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   429
    case AZERO :: rs1 => all_zero_except_alt(rs1, a)
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   430
    case AALTS(bs, rs1) :: rs2 => {
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   431
      if (a == AZERO)
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   432
        all_zero_except_alt(rs2, AALTS(bs, rs1))
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   433
      else
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   434
        AZERO
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   435
    }
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   436
    case r1 :: rs2 => AZERO
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   437
  }
0
Chengsong
parents:
diff changeset
   438
  def flats(rs: List[ARexp]): List[ARexp] = rs match {
Chengsong
parents:
diff changeset
   439
      case Nil => Nil
Chengsong
parents:
diff changeset
   440
      case AZERO :: rs1 => flats(rs1)
Chengsong
parents:
diff changeset
   441
      case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
Chengsong
parents:
diff changeset
   442
      case r1 :: rs2 => r1 :: flats(rs2)
15
Chengsong
parents: 14
diff changeset
   443
  }
17
Chengsong
parents: 16
diff changeset
   444
  /*
15
Chengsong
parents: 14
diff changeset
   445
  def remove(v: Val): Val = v match{
Chengsong
parents: 14
diff changeset
   446
    case Right(v1) => v1
Chengsong
parents: 14
diff changeset
   447
    case Left(v1) => v1
Chengsong
parents: 14
diff changeset
   448
    case _ => throw new Exception("Not removable")
17
Chengsong
parents: 16
diff changeset
   449
  }*/
15
Chengsong
parents: 14
diff changeset
   450
  def augment(v: Val, i: Int): Val = if(i > 1) augment(Right(v), i - 1) else Right(v)
Chengsong
parents: 14
diff changeset
   451
//an overly complex version
Chengsong
parents: 14
diff changeset
   452
/*
Chengsong
parents: 14
diff changeset
   453
    if(rel_dist >0){//the regex we are dealing with is not what v points at
Chengsong
parents: 14
diff changeset
   454
      rs match{
Chengsong
parents: 14
diff changeset
   455
        case Nil => throw new Exception("Trying to simplify a non-existent value")
Chengsong
parents: 14
diff changeset
   456
        case AZERO :: rs1 => flats_vsimp(rs1, rel_dist - 1, remove(v))
Chengsong
parents: 14
diff changeset
   457
        case AALTS(bs, rs1) :: rs2 => flats_vsimp(rs2, rel_dist - 1, augment(v, rs1.length - 1))//rs1 is guaranteed to have a len geq 2
Chengsong
parents: 14
diff changeset
   458
        case r1 :: rs2 => flats_vsimp(rs2, rel_dist - 1, v)
Chengsong
parents: 14
diff changeset
   459
      }
0
Chengsong
parents:
diff changeset
   460
    }
15
Chengsong
parents: 14
diff changeset
   461
    else if(rel_dist == 0){//we are dealing with regex v is pointing to -- "v->r itself"
Chengsong
parents: 14
diff changeset
   462
      rs match{//r1 cannot be zero
Chengsong
parents: 14
diff changeset
   463
        AALTS(bs, rs1) :: rs2 => flats_vsimp(  )
Chengsong
parents: 14
diff changeset
   464
        AZERO::rs2 => throw new Exception("Value corresponds to zero")
Chengsong
parents: 14
diff changeset
   465
        r1::rs2 => flats_vsimp(rs2, rel_dist - 1, v)
Chengsong
parents: 14
diff changeset
   466
      }
Chengsong
parents: 14
diff changeset
   467
Chengsong
parents: 14
diff changeset
   468
    }
Chengsong
parents: 14
diff changeset
   469
    else{
Chengsong
parents: 14
diff changeset
   470
Chengsong
parents: 14
diff changeset
   471
    }
Chengsong
parents: 14
diff changeset
   472
    */
Chengsong
parents: 14
diff changeset
   473
  def flats_vsimp(rs: List[ARexp], position: Int): Int = (rs, position) match {
Chengsong
parents: 14
diff changeset
   474
    case (_, 0) => 0
Chengsong
parents: 14
diff changeset
   475
    case (Nil, _) => 0
16
c51178fa85fe new version of slides
Chengsong
parents: 15
diff changeset
   476
    case (AZERO :: rs1, _) => flats_vsimp(rs1, position - 1) - 1
15
Chengsong
parents: 14
diff changeset
   477
    case (AALTS(bs, rs1) :: rs2, _) => rs1.length - 1 + flats_vsimp(rs2, position - 1)
Chengsong
parents: 14
diff changeset
   478
    case (r1 :: rs2, _) => flats_vsimp(rs2, position - 1)
Chengsong
parents: 14
diff changeset
   479
  }
0
Chengsong
parents:
diff changeset
   480
  def rflats(rs: List[Rexp]): List[Rexp] = rs match {
Chengsong
parents:
diff changeset
   481
    case Nil => Nil
Chengsong
parents:
diff changeset
   482
    case ZERO :: rs1 => rflats(rs1)
Chengsong
parents:
diff changeset
   483
    case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2)
Chengsong
parents:
diff changeset
   484
    case r1 :: rs2 => r1 :: rflats(rs2)
Chengsong
parents:
diff changeset
   485
  }
Chengsong
parents:
diff changeset
   486
  var flats_time = 0L
Chengsong
parents:
diff changeset
   487
  var dist_time = 0L
Chengsong
parents:
diff changeset
   488
  
Chengsong
parents:
diff changeset
   489
  def bsimp(r: ARexp): ARexp = r match {
Chengsong
parents:
diff changeset
   490
    case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
Chengsong
parents:
diff changeset
   491
        case (AZERO, _) => AZERO
Chengsong
parents:
diff changeset
   492
        case (_, AZERO) => AZERO
Chengsong
parents:
diff changeset
   493
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
Chengsong
parents:
diff changeset
   494
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
Chengsong
parents:
diff changeset
   495
    }
Chengsong
parents:
diff changeset
   496
    case AALTS(bs1, rs) => {
Chengsong
parents:
diff changeset
   497
      val rs_simp = rs.map(bsimp)
Chengsong
parents:
diff changeset
   498
      val flat_res = flats(rs_simp)
Chengsong
parents:
diff changeset
   499
      val dist_res = distinctBy(flat_res, erase)
Chengsong
parents:
diff changeset
   500
      dist_res match {
Chengsong
parents:
diff changeset
   501
        case Nil => AZERO
93
Chengsong
parents: 92
diff changeset
   502
        case r :: Nil => fuse(bs1, r)
0
Chengsong
parents:
diff changeset
   503
        case rs => AALTS(bs1, rs)  
Chengsong
parents:
diff changeset
   504
      }
Chengsong
parents:
diff changeset
   505
    }
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   506
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
0
Chengsong
parents:
diff changeset
   507
    case r => r
Chengsong
parents:
diff changeset
   508
  }
107
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   509
  //minimise fuse operation if possible
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   510
  def bsimp_rf(r: ARexp):ARexp = r match {
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   511
     case ASEQ(bs1, r1, r2) => (bsimp_rf(r1), bsimp_rf(r2)) match {
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   512
        case (AZERO, _) => AZERO
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   513
        case (_, AZERO) => AZERO
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   514
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   515
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   516
    }
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   517
    case AALTS(bs1, rs) => {
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   518
      //after map simp, before flats, check if all others simplify to 0s, if yes, do not fuse
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   519
      val rs_simp = rs.map(bsimp_rf)
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   520
      //prevent fuse from happening
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   521
      val fuse_alts = all_zero_except_alt(rs_simp, AZERO)//returns AZERO if not the case, return AALTS if yes
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   522
      if(fuse_alts == AZERO){
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   523
        val flat_res = flats(rs_simp)
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   524
        val dist_res = distinctBy(flat_res, erase)
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   525
        dist_res match {
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   526
          case Nil => AZERO
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   527
          case r :: Nil => fuse(bs1, r)
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   528
          case rs => AALTS(bs1, rs)  
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   529
        }
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   530
      }
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   531
      else{
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   532
        fuse(bs1, fuse_alts)
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   533
      }
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   534
    }
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   535
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   536
    case r => r   
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   537
  }
93
Chengsong
parents: 92
diff changeset
   538
  //only print at the top level
Chengsong
parents: 92
diff changeset
   539
17
Chengsong
parents: 16
diff changeset
   540
  def find_pos(v: Val, rs: List[ARexp]): Int = (v, rs) match{
Chengsong
parents: 16
diff changeset
   541
    case (v, r::Nil) => 0
15
Chengsong
parents: 14
diff changeset
   542
    case (Right(v), r::rs) => find_pos(v, rs) + 1
17
Chengsong
parents: 16
diff changeset
   543
    case (Left(v), r::rs) => 0
Chengsong
parents: 16
diff changeset
   544
    //case (v, _) => 0
Chengsong
parents: 16
diff changeset
   545
  }
Chengsong
parents: 16
diff changeset
   546
  def find_pos_aux(v: Val, r: ARexp): Int = r match {
Chengsong
parents: 16
diff changeset
   547
    case AALTS(bs, rs) => find_pos(v, rs)
Chengsong
parents: 16
diff changeset
   548
    case r => 0
15
Chengsong
parents: 14
diff changeset
   549
  }
17
Chengsong
parents: 16
diff changeset
   550
  def remove(v: Val, rs: List[ARexp]) : Val = (v,rs) match {//remove the outmost layer of ALTS's Left and Right
Chengsong
parents: 16
diff changeset
   551
    //we have to use r to detect the bound of nested L/Rs
Chengsong
parents: 16
diff changeset
   552
    case (v, r::Nil) => v
Chengsong
parents: 16
diff changeset
   553
    case (Right(v), r::rs) => remove(v, rs) 
15
Chengsong
parents: 14
diff changeset
   554
    case (Left(v), r::rs) => v 
17
Chengsong
parents: 16
diff changeset
   555
    //case (v, r::Nil) => v
15
Chengsong
parents: 14
diff changeset
   556
  }
16
c51178fa85fe new version of slides
Chengsong
parents: 15
diff changeset
   557
  def simple_end(v: Val): Boolean = v match {
15
Chengsong
parents: 14
diff changeset
   558
    case Left(v) => return false
Chengsong
parents: 14
diff changeset
   559
    case Right(v) => return simple_end(v)
Chengsong
parents: 14
diff changeset
   560
    case v => return true
Chengsong
parents: 14
diff changeset
   561
  }
17
Chengsong
parents: 16
diff changeset
   562
  def isend(v: Val, rs: List[ARexp], position: Int): Boolean = {//TODO: here the slice api i am not familiar with so this call might be incorrect and crash the bsimp2
Chengsong
parents: 16
diff changeset
   563
    val rsbh = rs.slice(position + 1, rs.length)
15
Chengsong
parents: 14
diff changeset
   564
    val out_end = if(flats(rsbh) == Nil) true else false
Chengsong
parents: 14
diff changeset
   565
    val inner_end = simple_end(v)
Chengsong
parents: 14
diff changeset
   566
    inner_end && out_end
Chengsong
parents: 14
diff changeset
   567
  }
16
c51178fa85fe new version of slides
Chengsong
parents: 15
diff changeset
   568
  def get_coat(v: Val, rs: List[Rexp], vs: Val): Val = (v, rs) match{//the dual operation of remove(so-called by myself)
15
Chengsong
parents: 14
diff changeset
   569
    case (Right(v), r::Nil) => Right(vs)
Chengsong
parents: 14
diff changeset
   570
    case (Left(v), r::rs) => Left(vs) 
Chengsong
parents: 14
diff changeset
   571
    case (Right(v), r::rs) => Right(get_coat(v, rs, vs))
Chengsong
parents: 14
diff changeset
   572
  }
Chengsong
parents: 14
diff changeset
   573
  def coat(v: Val, i: Int) : Val = i match {
Chengsong
parents: 14
diff changeset
   574
    case 0 => v
Chengsong
parents: 14
diff changeset
   575
    case i => coat(Right(v), i - 1)
Chengsong
parents: 14
diff changeset
   576
  }
17
Chengsong
parents: 16
diff changeset
   577
  //This version takes a regex and a value, return a simplified regex and its corresponding simplified value 
Chengsong
parents: 16
diff changeset
   578
  def bsimp2(r: ARexp, v: Val): (ARexp, Val) = (r,v) match{
Chengsong
parents: 16
diff changeset
   579
    case (ASEQ(bs1, r1, r2), Sequ(v1, v2)) => (bsimp2(r1, v1), bsimp2(r2, v2)) match {
Chengsong
parents: 16
diff changeset
   580
        case ((AZERO, _), (_, _) )=> (AZERO, undefined)
Chengsong
parents: 16
diff changeset
   581
        case ((_, _), (AZERO, _)) => (AZERO, undefined)
Chengsong
parents: 16
diff changeset
   582
        case ((AONE(bs2), v1s) , (r2s, v2s)) => (fuse(bs1 ++ bs2, r2s), v2s )//v2 tells how to retrieve bits in r2s, which is enough as the bits of the first part of the sequence has already been integrated to the top level of the second regx.
Chengsong
parents: 16
diff changeset
   583
        case ((r1s, v1s), (r2s, v2s)) => (ASEQ(bs1, r1s, r2s),  Sequ(v1s, v2s))
Chengsong
parents: 16
diff changeset
   584
    }
Chengsong
parents: 16
diff changeset
   585
    case (AALTS(bs1, rs), v) => {
Chengsong
parents: 16
diff changeset
   586
      //phase 1 transformation so that aalts(bs1, rs) => aalts(bs1, rsf) and v => vf
Chengsong
parents: 16
diff changeset
   587
      val init_ind = find_pos(v, rs)
Chengsong
parents: 16
diff changeset
   588
      val vs = bsimp2(rs(init_ind), remove(v, rs))//remove all the outer layers of left and right in v to  match the regx rs[i]
Chengsong
parents: 16
diff changeset
   589
      //println(vs)
Chengsong
parents: 16
diff changeset
   590
      val rs_simp = rs.map(bsimp)
Chengsong
parents: 16
diff changeset
   591
      val vs_kernel = rs_simp(init_ind) match {
Chengsong
parents: 16
diff changeset
   592
        case AALTS(bs2, rs2) => remove(vs._2, rs2)//remove the secondary layer of left and right
Chengsong
parents: 16
diff changeset
   593
        case r => vs._2
Chengsong
parents: 16
diff changeset
   594
      }
Chengsong
parents: 16
diff changeset
   595
      val flat_res = flats(rs_simp)
Chengsong
parents: 16
diff changeset
   596
      val vs_for_coating = if(isend(vs._2, rs_simp, init_ind)||flat_res.length == 1) vs_kernel else Left(vs_kernel)
Chengsong
parents: 16
diff changeset
   597
      val r_s = rs_simp(init_ind)//or vs._1
Chengsong
parents: 16
diff changeset
   598
      val shift = flats_vsimp(rs_simp, init_ind) + find_pos_aux(vs._2, rs_simp(init_ind))
Chengsong
parents: 16
diff changeset
   599
      val new_ind = init_ind + shift
Chengsong
parents: 16
diff changeset
   600
      val vf = coat(vs_for_coating, new_ind)
Chengsong
parents: 16
diff changeset
   601
      //flats2 returns a list of regex and a single v
Chengsong
parents: 16
diff changeset
   602
      //now |- vf: ALTS(bs1, flat_res)
Chengsong
parents: 16
diff changeset
   603
      //phase 2 transformation so that aalts(bs1, rsf) => aalts(bs, rsdb) and vf => vdb
Chengsong
parents: 16
diff changeset
   604
      val dist_res = distinctBy(flat_res, erase)
Chengsong
parents: 16
diff changeset
   605
      val front_part = distinctBy(flat_res.slice(0, new_ind + 1), erase)
Chengsong
parents: 16
diff changeset
   606
      //val size_reduction = new_ind + 1 - front_part.length
Chengsong
parents: 16
diff changeset
   607
      val vdb = if(dist_res.length == front_part.length )//that means the regex we are interested in is at the end of the list
Chengsong
parents: 16
diff changeset
   608
      {
Chengsong
parents: 16
diff changeset
   609
        coat(vs_kernel, front_part.length - 1)
Chengsong
parents: 16
diff changeset
   610
      }
Chengsong
parents: 16
diff changeset
   611
      else{
Chengsong
parents: 16
diff changeset
   612
        coat(Left(vs_kernel), front_part.length - 1)
Chengsong
parents: 16
diff changeset
   613
      }
Chengsong
parents: 16
diff changeset
   614
      //println(vdb)
Chengsong
parents: 16
diff changeset
   615
      //we don't need to transform vdb as this phase will not make enough changes to the regex to affect value.
Chengsong
parents: 16
diff changeset
   616
      //the above statement needs verification. but can be left as is now.
Chengsong
parents: 16
diff changeset
   617
      dist_res match {
Chengsong
parents: 16
diff changeset
   618
        case Nil => (AZERO, undefined)
Chengsong
parents: 16
diff changeset
   619
        case s :: Nil => (fuse(bs1, s), vdb)
Chengsong
parents: 16
diff changeset
   620
        case rs => (AALTS(bs1, rs), vdb)
Chengsong
parents: 16
diff changeset
   621
      }
Chengsong
parents: 16
diff changeset
   622
    }
Chengsong
parents: 16
diff changeset
   623
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
Chengsong
parents: 16
diff changeset
   624
    case (r, v) => (r, v)  
Chengsong
parents: 16
diff changeset
   625
  }
Chengsong
parents: 16
diff changeset
   626
  def vsimp(r: ARexp, v: Val): Val = bsimp2(r, v)._2
Chengsong
parents: 16
diff changeset
   627
  /*This version was first intended for returning a function however a value would be simpler.
15
Chengsong
parents: 14
diff changeset
   628
  def bsimp2(r: ARexp, v: Val): (ARexp, Val => Val) = (r,v) match{
Chengsong
parents: 14
diff changeset
   629
    case (ASEQ(bs1, r1, r2), v) => (bsimp2(r1), bsimp2(r2)) match {
Chengsong
parents: 14
diff changeset
   630
        case ((AZERO, _), (_, _) )=> (AZERO, undefined)
Chengsong
parents: 14
diff changeset
   631
        case ((_, _), (AZERO, _)) => (AZERO, undefined)
Chengsong
parents: 14
diff changeset
   632
        case ((AONE(bs2), f1) , (r2s, f2)) => (fuse(bs1 ++ bs2, r2s), lambda v => v match { case Sequ(_, v) => f2(v) } )
Chengsong
parents: 14
diff changeset
   633
        case ((r1s, f1), (r2s, f2)) => (ASEQ(bs1, r1s, r2s), lambda v => v match {case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))}
Chengsong
parents: 14
diff changeset
   634
    }
Chengsong
parents: 14
diff changeset
   635
    case AALTS(bs1, rs) => {
Chengsong
parents: 14
diff changeset
   636
      val init_ind = find_pos(v, rs)
Chengsong
parents: 14
diff changeset
   637
      val vs = bsimp2(rs[init_ind], remove(v, rs))//remove all the outer layers of left and right in v to  match the regx rs[i]
Chengsong
parents: 14
diff changeset
   638
      val rs_simp = rs.map(bsimp)
Chengsong
parents: 14
diff changeset
   639
      val vs_kernel = rs_simp[init_ind] match {
Chengsong
parents: 14
diff changeset
   640
        case AALTS(bs2, rs2) => remove(vs, rs_simp[init_ind])//remove the secondary layer of left and right
Chengsong
parents: 14
diff changeset
   641
        case r => vs
Chengsong
parents: 14
diff changeset
   642
      }
Chengsong
parents: 14
diff changeset
   643
      val vs_for_coating = if(isend(vs, rs_simp, init_ind)) vs_kernel else Left(vs_kernel)
Chengsong
parents: 14
diff changeset
   644
Chengsong
parents: 14
diff changeset
   645
      val r_s = rs_simp[init_ind]
Chengsong
parents: 14
diff changeset
   646
      val shift = flats_vsimp(vs, rs_simp, init_ind) + find_pos(vs, rs_simp[init_ind])
Chengsong
parents: 14
diff changeset
   647
      val vf = coat(vs_for_coating, shift + init_ind)
Chengsong
parents: 14
diff changeset
   648
Chengsong
parents: 14
diff changeset
   649
      val flat_res = flats(rs_simp)//flats2 returns a list of regex and a single v
Chengsong
parents: 14
diff changeset
   650
      val dist_res = distinctBy(flat_res, erase)
Chengsong
parents: 14
diff changeset
   651
      dist_res match {
Chengsong
parents: 14
diff changeset
   652
        case Nil => AZERO
Chengsong
parents: 14
diff changeset
   653
        case s :: Nil => fuse(bs1, s)
Chengsong
parents: 14
diff changeset
   654
        case rs => AALTS(bs1, rs)  
Chengsong
parents: 14
diff changeset
   655
      }
Chengsong
parents: 14
diff changeset
   656
    }
Chengsong
parents: 14
diff changeset
   657
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
Chengsong
parents: 14
diff changeset
   658
    case r => r  
16
c51178fa85fe new version of slides
Chengsong
parents: 15
diff changeset
   659
  }*/
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   660
  def super_bsimp(r: ARexp): ARexp = r match {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   661
    case ASEQ(bs1, r1, r2) => (super_bsimp(r1), super_bsimp(r2)) match {
0
Chengsong
parents:
diff changeset
   662
        case (AZERO, _) => AZERO
Chengsong
parents:
diff changeset
   663
        case (_, AZERO) => AZERO
11
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   664
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)//万一是(r1, alts(rs))这种形式呢
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   665
        case (AALTS(bs2, rs), r2) => AALTS(bs1 ++ bs2, rs.map(r => r match {case AONE(bs3) => fuse(bs3, r2) case r => ASEQ(Nil, r, r2)} ) ) 
0
Chengsong
parents:
diff changeset
   666
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
Chengsong
parents:
diff changeset
   667
    }
Chengsong
parents:
diff changeset
   668
    case AALTS(bs1, rs) => {
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   669
      val rs_simp = rs.map(super_bsimp)
0
Chengsong
parents:
diff changeset
   670
      val flat_res = flats(rs_simp)
Chengsong
parents:
diff changeset
   671
      val dist_res = distinctBy(flat_res, erase)
Chengsong
parents:
diff changeset
   672
      dist_res match {
Chengsong
parents:
diff changeset
   673
        case Nil => AZERO
Chengsong
parents:
diff changeset
   674
        case s :: Nil => fuse(bs1, s)
Chengsong
parents:
diff changeset
   675
        case rs => AALTS(bs1, rs)  
Chengsong
parents:
diff changeset
   676
      }
Chengsong
parents:
diff changeset
   677
    }
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   678
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
0
Chengsong
parents:
diff changeset
   679
    case r => r
Chengsong
parents:
diff changeset
   680
  }
Chengsong
parents:
diff changeset
   681
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   682
0
Chengsong
parents:
diff changeset
   683
  def simp_weakened(r: Rexp): Rexp = r match {
Chengsong
parents:
diff changeset
   684
    case SEQ(r1, r2) => (simp_weakened(r1), r2) match {
Chengsong
parents:
diff changeset
   685
        case (ZERO, _) => ZERO
Chengsong
parents:
diff changeset
   686
        case (_, ZERO) => ZERO
Chengsong
parents:
diff changeset
   687
        case (ONE, r2s) => r2s
Chengsong
parents:
diff changeset
   688
        case (r1s, r2s) => SEQ(r1s, r2s)
Chengsong
parents:
diff changeset
   689
    }
Chengsong
parents:
diff changeset
   690
    case ALTS(rs) => {
Chengsong
parents:
diff changeset
   691
      val rs_simp = rs.map(simp_weakened)
Chengsong
parents:
diff changeset
   692
      val flat_res = rflats(rs_simp)
Chengsong
parents:
diff changeset
   693
      val dist_res = rs_simp.distinct
Chengsong
parents:
diff changeset
   694
      dist_res match {
Chengsong
parents:
diff changeset
   695
        case Nil => ZERO
Chengsong
parents:
diff changeset
   696
        case s :: Nil => s
Chengsong
parents:
diff changeset
   697
        case rs => ALTS(rs)  
Chengsong
parents:
diff changeset
   698
      }
Chengsong
parents:
diff changeset
   699
    }
Chengsong
parents:
diff changeset
   700
    case STAR(r) => STAR(simp_weakened(r))
Chengsong
parents:
diff changeset
   701
    case r => r
Chengsong
parents:
diff changeset
   702
  }
Chengsong
parents:
diff changeset
   703
    
Chengsong
parents:
diff changeset
   704
  def bders_simp (s: List[Char], r: ARexp) : ARexp = s match {
Chengsong
parents:
diff changeset
   705
    case Nil => r
Chengsong
parents:
diff changeset
   706
    case c::s => bders_simp(s, bsimp(bder(c, r)))
Chengsong
parents:
diff changeset
   707
  }
107
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   708
  def bders_simp_rf (s: List[Char], r: ARexp) : ARexp = s match {
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   709
    case Nil => r
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   710
    case c::s => bders_simp_rf(s, bsimp_rf(bder(c, r)))
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   711
  }
b1e365afa29c changes
Chengsong
parents: 93
diff changeset
   712
  
14
610f14009c0b the property
Chengsong
parents: 12
diff changeset
   713
  //----------------------------------------------------------------------------experiment bsimp
610f14009c0b the property
Chengsong
parents: 12
diff changeset
   714
  /*
610f14009c0b the property
Chengsong
parents: 12
diff changeset
   715
0
Chengsong
parents:
diff changeset
   716
  */
Chengsong
parents:
diff changeset
   717
  /*
Chengsong
parents:
diff changeset
   718
  def time[T](code: => T) = {
Chengsong
parents:
diff changeset
   719
    val start = System.nanoTime()
Chengsong
parents:
diff changeset
   720
    val result = code
Chengsong
parents:
diff changeset
   721
    val end = System.nanoTime()
Chengsong
parents:
diff changeset
   722
    println((end - start)/1.0e9)
Chengsong
parents:
diff changeset
   723
    result
Chengsong
parents:
diff changeset
   724
  }
Chengsong
parents:
diff changeset
   725
  */
Chengsong
parents:
diff changeset
   726
  // main unsimplified lexing function (produces a value)
Chengsong
parents:
diff changeset
   727
  def blex(r: ARexp, s: List[Char]) : Bits = s match {
Chengsong
parents:
diff changeset
   728
    case Nil => if (bnullable(r)) mkepsBC(r) else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   729
    case c::cs => {
Chengsong
parents:
diff changeset
   730
      val der_res = bder(c,r)
Chengsong
parents:
diff changeset
   731
      blex(der_res, cs)
Chengsong
parents:
diff changeset
   732
    }
Chengsong
parents:
diff changeset
   733
  }
Chengsong
parents:
diff changeset
   734
Chengsong
parents:
diff changeset
   735
  def bpre_lexing(r: Rexp, s: String) = blex(internalise(r), s.toList)
14
610f14009c0b the property
Chengsong
parents: 12
diff changeset
   736
  def blexing(r: Rexp, s: String) : Val = decode(r, blex(internalise(r), s.toList))
0
Chengsong
parents:
diff changeset
   737
Chengsong
parents:
diff changeset
   738
  var bder_time = 0L
Chengsong
parents:
diff changeset
   739
  var bsimp_time = 0L
Chengsong
parents:
diff changeset
   740
  var mkepsBC_time = 0L
Chengsong
parents:
diff changeset
   741
  var small_de = 2
Chengsong
parents:
diff changeset
   742
  var big_de = 5
Chengsong
parents:
diff changeset
   743
  var usual_de = 3
Chengsong
parents:
diff changeset
   744
  
Chengsong
parents:
diff changeset
   745
  def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
Chengsong
parents:
diff changeset
   746
    case Nil => {
Chengsong
parents:
diff changeset
   747
      if (bnullable(r)) {
Chengsong
parents:
diff changeset
   748
        //println(asize(r))
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   749
        mkepsBC(r)
0
Chengsong
parents:
diff changeset
   750
      }
Chengsong
parents:
diff changeset
   751
    else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   752
    }
Chengsong
parents:
diff changeset
   753
    case c::cs => {
Chengsong
parents:
diff changeset
   754
      val der_res = bder(c,r)
Chengsong
parents:
diff changeset
   755
      val simp_res = bsimp(der_res)
Chengsong
parents:
diff changeset
   756
      blex_simp(simp_res, cs)      
Chengsong
parents:
diff changeset
   757
    }
Chengsong
parents:
diff changeset
   758
  }
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   759
  def super_blex_simp(r: ARexp, s: List[Char]): Bits = s match {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   760
    case Nil => {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   761
      if (bnullable(r)) {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   762
        mkepsBC(r)
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   763
      }
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   764
      else throw new Exception("Not matched")
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   765
    }
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   766
    case c::cs => {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   767
      super_blex_simp(super_bsimp(bder(c,r)), cs)
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   768
    }
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   769
  }
0
Chengsong
parents:
diff changeset
   770
  def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{
Chengsong
parents:
diff changeset
   771
    case Nil => r
Chengsong
parents:
diff changeset
   772
    case c::cs => blex_real_simp(bsimp(bder(c, r)), cs)
Chengsong
parents:
diff changeset
   773
  }
Chengsong
parents:
diff changeset
   774
Chengsong
parents:
diff changeset
   775
Chengsong
parents:
diff changeset
   776
  //size: of a Aregx for testing purposes 
Chengsong
parents:
diff changeset
   777
  def size(r: Rexp) : Int = r match {
Chengsong
parents:
diff changeset
   778
    case ZERO => 1
Chengsong
parents:
diff changeset
   779
    case ONE => 1
Chengsong
parents:
diff changeset
   780
    case CHAR(_) => 1
Chengsong
parents:
diff changeset
   781
    case SEQ(r1, r2) => 1 + size(r1) + size(r2)
Chengsong
parents:
diff changeset
   782
    case ALTS(rs) => 1 + rs.map(size).sum
Chengsong
parents:
diff changeset
   783
    case STAR(r) => 1 + size(r)
Chengsong
parents:
diff changeset
   784
  }
Chengsong
parents:
diff changeset
   785
Chengsong
parents:
diff changeset
   786
  def asize(a: ARexp) = size(erase(a))
Chengsong
parents:
diff changeset
   787
Chengsong
parents:
diff changeset
   788
Chengsong
parents:
diff changeset
   789
  // decoding does not work yet
Chengsong
parents:
diff changeset
   790
  def blexing_simp(r: Rexp, s: String) = {
Chengsong
parents:
diff changeset
   791
    val bit_code = blex_simp(internalise(r), s.toList)
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   792
    decode(r, bit_code) 
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   793
  }
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   794
  def super_blexing_simp(r: Rexp, s: String) = {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   795
    decode(r, super_blex_simp(internalise(r), s.toList))
0
Chengsong
parents:
diff changeset
   796
  }
Chengsong
parents:
diff changeset
   797
Chengsong
parents:
diff changeset
   798
Chengsong
parents:
diff changeset
   799
Chengsong
parents:
diff changeset
   800
Chengsong
parents:
diff changeset
   801
Chengsong
parents:
diff changeset
   802
  // Lexing Rules for a Small While Language
Chengsong
parents:
diff changeset
   803
Chengsong
parents:
diff changeset
   804
  //symbols
Chengsong
parents:
diff changeset
   805
  /*
Chengsong
parents:
diff changeset
   806
  val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_))
Chengsong
parents:
diff changeset
   807
  
Chengsong
parents:
diff changeset
   808
  //digits
Chengsong
parents:
diff changeset
   809
  val DIGIT = PRED("0123456789".contains(_))
Chengsong
parents:
diff changeset
   810
  //identifiers
Chengsong
parents:
diff changeset
   811
  val ID = SYM ~ (SYM | DIGIT).% 
Chengsong
parents:
diff changeset
   812
  //numbers
Chengsong
parents:
diff changeset
   813
  val NUM = STAR(DIGIT)
Chengsong
parents:
diff changeset
   814
  //keywords
Chengsong
parents:
diff changeset
   815
  val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false"
Chengsong
parents:
diff changeset
   816
  val AKEYWORD: Rexp = ALTS(List("skip" , "while" , "do" , "if" , "then" , "else" , "read" , "write" , "true" , "false"))
Chengsong
parents:
diff changeset
   817
  //semicolons
Chengsong
parents:
diff changeset
   818
  val SEMI: Rexp = ";"
Chengsong
parents:
diff changeset
   819
  //operators
Chengsong
parents:
diff changeset
   820
  val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
Chengsong
parents:
diff changeset
   821
  val AOP: Rexp = ALTS(List(":=" , "==" , "-" , "+" , "*" , "!=" , "<" , ">" , "<=" , ">=" , "%" , "/"))
Chengsong
parents:
diff changeset
   822
  //whitespaces
Chengsong
parents:
diff changeset
   823
  val WHITESPACE = PLUS(" " | "\n" | "\t")
Chengsong
parents:
diff changeset
   824
  //parentheses
Chengsong
parents:
diff changeset
   825
  val RPAREN: Rexp = ")"
Chengsong
parents:
diff changeset
   826
  val LPAREN: Rexp = "("
Chengsong
parents:
diff changeset
   827
  val BEGIN: Rexp = "{"
Chengsong
parents:
diff changeset
   828
  val END: Rexp = "}"
Chengsong
parents:
diff changeset
   829
  //strings...but probably needs not
Chengsong
parents:
diff changeset
   830
  val STRING: Rexp = "\"" ~ SYM.% ~ "\""
Chengsong
parents:
diff changeset
   831
Chengsong
parents:
diff changeset
   832
Chengsong
parents:
diff changeset
   833
Chengsong
parents:
diff changeset
   834
  val WHILE_REGS = (("k" $ KEYWORD) | 
Chengsong
parents:
diff changeset
   835
                    ("i" $ ID) | 
Chengsong
parents:
diff changeset
   836
                    ("o" $ OP) | 
Chengsong
parents:
diff changeset
   837
                    ("n" $ NUM) | 
Chengsong
parents:
diff changeset
   838
                    ("s" $ SEMI) | 
Chengsong
parents:
diff changeset
   839
                    ("str" $ STRING) |
Chengsong
parents:
diff changeset
   840
                    ("p" $ (LPAREN | RPAREN)) | 
Chengsong
parents:
diff changeset
   841
                    ("b" $ (BEGIN | END)) | 
Chengsong
parents:
diff changeset
   842
                    ("w" $ WHITESPACE)).%
Chengsong
parents:
diff changeset
   843
Chengsong
parents:
diff changeset
   844
  val AWHILE_REGS = (
Chengsong
parents:
diff changeset
   845
    ALTS(
Chengsong
parents:
diff changeset
   846
      List(
Chengsong
parents:
diff changeset
   847
        ("k" $ AKEYWORD), 
Chengsong
parents:
diff changeset
   848
                    ("i" $ ID),
Chengsong
parents:
diff changeset
   849
                    ("o" $ AOP) , 
Chengsong
parents:
diff changeset
   850
                    ("n" $ NUM) ,
Chengsong
parents:
diff changeset
   851
                    ("s" $ SEMI) ,
Chengsong
parents:
diff changeset
   852
                    ("str" $ STRING), 
Chengsong
parents:
diff changeset
   853
                    ("p" $ (LPAREN | RPAREN)), 
Chengsong
parents:
diff changeset
   854
                    ("b" $ (BEGIN | END)), 
Chengsong
parents:
diff changeset
   855
                    ("w" $ WHITESPACE)
Chengsong
parents:
diff changeset
   856
      )
Chengsong
parents:
diff changeset
   857
    )
Chengsong
parents:
diff changeset
   858
  ).%
Chengsong
parents:
diff changeset
   859
Chengsong
parents:
diff changeset
   860
*/
Chengsong
parents:
diff changeset
   861
Chengsong
parents:
diff changeset
   862
Chengsong
parents:
diff changeset
   863
  //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART (TESTING)
Chengsong
parents:
diff changeset
   864
  /*
Chengsong
parents:
diff changeset
   865
  // Two Simple While programs
Chengsong
parents:
diff changeset
   866
  //========================
Chengsong
parents:
diff changeset
   867
  println("prog0 test")
Chengsong
parents:
diff changeset
   868
Chengsong
parents:
diff changeset
   869
  val prog0 = """read n"""
Chengsong
parents:
diff changeset
   870
  println(env(lexing_simp(WHILE_REGS, prog0)))
Chengsong
parents:
diff changeset
   871
  println(tokenise(WHILE_REGS, prog0))
Chengsong
parents:
diff changeset
   872
Chengsong
parents:
diff changeset
   873
  println("prog1 test")
Chengsong
parents:
diff changeset
   874
Chengsong
parents:
diff changeset
   875
  val prog1 = """read  n; write (n)"""
Chengsong
parents:
diff changeset
   876
  println(tokenise(WHILE_REGS, prog1))
Chengsong
parents:
diff changeset
   877
Chengsong
parents:
diff changeset
   878
  */
Chengsong
parents:
diff changeset
   879
  // Bigger Tests
Chengsong
parents:
diff changeset
   880
  //==============
Chengsong
parents:
diff changeset
   881
Chengsong
parents:
diff changeset
   882
  def escape(raw: String): String = {
Chengsong
parents:
diff changeset
   883
    import scala.reflect.runtime.universe._
Chengsong
parents:
diff changeset
   884
    Literal(Constant(raw)).toString
Chengsong
parents:
diff changeset
   885
  }
Chengsong
parents:
diff changeset
   886
Chengsong
parents:
diff changeset
   887
  val prog2 = """
Chengsong
parents:
diff changeset
   888
  write "Fib";
Chengsong
parents:
diff changeset
   889
  read n;
Chengsong
parents:
diff changeset
   890
  minus1 := 0;
Chengsong
parents:
diff changeset
   891
  minus2 := 1;
Chengsong
parents:
diff changeset
   892
  while n > 0 do {
Chengsong
parents:
diff changeset
   893
    temp := minus2;
Chengsong
parents:
diff changeset
   894
    minus2 := minus1 + minus2;
Chengsong
parents:
diff changeset
   895
    minus1 := temp;
Chengsong
parents:
diff changeset
   896
    n := n - 1
Chengsong
parents:
diff changeset
   897
  };
Chengsong
parents:
diff changeset
   898
  write "Result";
Chengsong
parents:
diff changeset
   899
  write minus2
Chengsong
parents:
diff changeset
   900
  """
Chengsong
parents:
diff changeset
   901
Chengsong
parents:
diff changeset
   902
  val prog3 = """
Chengsong
parents:
diff changeset
   903
  start := 1000;
Chengsong
parents:
diff changeset
   904
  x := start;
Chengsong
parents:
diff changeset
   905
  y := start;
Chengsong
parents:
diff changeset
   906
  z := start;
Chengsong
parents:
diff changeset
   907
  while 0 < x do {
Chengsong
parents:
diff changeset
   908
  while 0 < y do {
Chengsong
parents:
diff changeset
   909
    while 0 < z do {
Chengsong
parents:
diff changeset
   910
      z := z - 1
Chengsong
parents:
diff changeset
   911
    };
Chengsong
parents:
diff changeset
   912
    z := start;
Chengsong
parents:
diff changeset
   913
    y := y - 1
Chengsong
parents:
diff changeset
   914
  };     
Chengsong
parents:
diff changeset
   915
  y := start;
Chengsong
parents:
diff changeset
   916
  x := x - 1
Chengsong
parents:
diff changeset
   917
  }
Chengsong
parents:
diff changeset
   918
  """
Chengsong
parents:
diff changeset
   919
  /*
Chengsong
parents:
diff changeset
   920
  for(i <- 400 to 400 by 1){
Chengsong
parents:
diff changeset
   921
    println(i+":")
Chengsong
parents:
diff changeset
   922
    blexing_simp(WHILE_REGS, prog2 * i)
Chengsong
parents:
diff changeset
   923
  } */
Chengsong
parents:
diff changeset
   924
Chengsong
parents:
diff changeset
   925
    /*
Chengsong
parents:
diff changeset
   926
    for (i <- 2 to 5){
Chengsong
parents:
diff changeset
   927
      for(j <- 1 to 3){
Chengsong
parents:
diff changeset
   928
        println(i,j)
Chengsong
parents:
diff changeset
   929
        small_de = i
Chengsong
parents:
diff changeset
   930
        usual_de = i + j
Chengsong
parents:
diff changeset
   931
        big_de = i + 2*j 
Chengsong
parents:
diff changeset
   932
        blexing_simp(AWHILE_REGS, prog2 * 100)
Chengsong
parents:
diff changeset
   933
      }
Chengsong
parents:
diff changeset
   934
    }*/
Chengsong
parents:
diff changeset
   935
Chengsong
parents:
diff changeset
   936
  /*
Chengsong
parents:
diff changeset
   937
  println("Tokens of prog2")
Chengsong
parents:
diff changeset
   938
  println(tokenise(WHILE_REGS, prog2).mkString("\n"))
Chengsong
parents:
diff changeset
   939
Chengsong
parents:
diff changeset
   940
  val fib_tokens = tokenise(WHILE_REGS, prog2)
Chengsong
parents:
diff changeset
   941
  fib_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
Chengsong
parents:
diff changeset
   942
Chengsong
parents:
diff changeset
   943
Chengsong
parents:
diff changeset
   944
  val test_tokens = tokenise(WHILE_REGS, prog3)
Chengsong
parents:
diff changeset
   945
  test_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
Chengsong
parents:
diff changeset
   946
  */
Chengsong
parents:
diff changeset
   947
Chengsong
parents:
diff changeset
   948
  /*
Chengsong
parents:
diff changeset
   949
  println("time test for blexing_simp")
Chengsong
parents:
diff changeset
   950
  for (i <- 1 to 1 by 1) {
Chengsong
parents:
diff changeset
   951
    lexing_simp(WHILE_REGS, prog2 * i)
Chengsong
parents:
diff changeset
   952
    blexing_simp(WHILE_REGS, prog2 * i)
Chengsong
parents:
diff changeset
   953
    for( j <- 0 to each_simp_timeb.length - 1){
Chengsong
parents:
diff changeset
   954
      if( each_simp_timeb(j)/each_simp_time(j) >= 10.0 )
Chengsong
parents:
diff changeset
   955
        println(j, each_simp_timeb(j), each_simp_time(j))
Chengsong
parents:
diff changeset
   956
    }
Chengsong
parents:
diff changeset
   957
  }
Chengsong
parents:
diff changeset
   958
  */
Chengsong
parents:
diff changeset
   959
Chengsong
parents:
diff changeset
   960
Chengsong
parents:
diff changeset
   961
  //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART (TESTING)
Chengsong
parents:
diff changeset
   962
Chengsong
parents:
diff changeset
   963
Chengsong
parents:
diff changeset
   964
Chengsong
parents:
diff changeset
   965
  def clear() = {
Chengsong
parents:
diff changeset
   966
    print("")
Chengsong
parents:
diff changeset
   967
    //print("\33[H\33[2J")
Chengsong
parents:
diff changeset
   968
  }
Chengsong
parents:
diff changeset
   969
Chengsong
parents:
diff changeset
   970
  //testing the two lexings produce the same value
Chengsong
parents:
diff changeset
   971
  //enumerates strings of length n over alphabet cs
Chengsong
parents:
diff changeset
   972
  def strs(n: Int, cs: String) : Set[String] = {
Chengsong
parents:
diff changeset
   973
    if (n == 0) Set("")
Chengsong
parents:
diff changeset
   974
    else {
Chengsong
parents:
diff changeset
   975
      val ss = strs(n - 1, cs)
Chengsong
parents:
diff changeset
   976
      ss ++
Chengsong
parents:
diff changeset
   977
      (for (s <- ss; c <- cs.toList) yield c + s)
Chengsong
parents:
diff changeset
   978
    }
Chengsong
parents:
diff changeset
   979
  }
Chengsong
parents:
diff changeset
   980
  def enum(n: Int, s: String) : Stream[Rexp] = n match {
Chengsong
parents:
diff changeset
   981
    case 0 => ZERO #:: ONE #:: s.toStream.map(CHAR)
Chengsong
parents:
diff changeset
   982
    case n => {  
Chengsong
parents:
diff changeset
   983
      val rs = enum(n - 1, s)
Chengsong
parents:
diff changeset
   984
      rs #:::
Chengsong
parents:
diff changeset
   985
      (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) #:::
Chengsong
parents:
diff changeset
   986
      (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) #:::
Chengsong
parents:
diff changeset
   987
      (for (r1 <- rs) yield STAR(r1))
Chengsong
parents:
diff changeset
   988
    }
Chengsong
parents:
diff changeset
   989
  }
Chengsong
parents:
diff changeset
   990
Chengsong
parents:
diff changeset
   991
  //tests blexing and lexing
Chengsong
parents:
diff changeset
   992
  def tests_blexer_simp(ss: Set[String])(r: Rexp) = {
Chengsong
parents:
diff changeset
   993
    clear()
Chengsong
parents:
diff changeset
   994
    //println(s"Testing ${r}")
Chengsong
parents:
diff changeset
   995
    for (s <- ss.par) yield {
Chengsong
parents:
diff changeset
   996
      val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None)
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   997
      val res2 = Try(Some(super_blexing_simp(r, s))).getOrElse(None)
0
Chengsong
parents:
diff changeset
   998
      if (res1 != res2) println(s"Disagree on ${r} and ${s}")
Chengsong
parents:
diff changeset
   999
      if (res1 != res2) println(s"   ${res1} !=  ${res2}")
Chengsong
parents:
diff changeset
  1000
      if (res1 != res2) Some((r, s)) else None
Chengsong
parents:
diff changeset
  1001
    }
Chengsong
parents:
diff changeset
  1002
  }
Chengsong
parents:
diff changeset
  1003
Chengsong
parents:
diff changeset
  1004
Chengsong
parents:
diff changeset
  1005
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
  1006
  
0
Chengsong
parents:
diff changeset
  1007
  /*
Chengsong
parents:
diff changeset
  1008
  def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = {
Chengsong
parents:
diff changeset
  1009
    for (s <- ss){
Chengsong
parents:
diff changeset
  1010
      
Chengsong
parents:
diff changeset
  1011
      val der_res = bder(c, ar) 
Chengsong
parents:
diff changeset
  1012
      val simp_res = bsimp(der_res)
Chengsong
parents:
diff changeset
  1013
      println(asize(der_res))
Chengsong
parents:
diff changeset
  1014
      println(asize(simp_res))
Chengsong
parents:
diff changeset
  1015
      single_expression_explorer(simp_res, (sc - c))
Chengsong
parents:
diff changeset
  1016
    }
Chengsong
parents:
diff changeset
  1017
  }*/
Chengsong
parents:
diff changeset
  1018
Chengsong
parents:
diff changeset
  1019
  //single_expression_explorer(internalise(("c"~("a"+"b"))%) , Set('a','b','c'))
Chengsong
parents:
diff changeset
  1020
Chengsong
parents:
diff changeset
  1021
Chengsong
parents:
diff changeset
  1022
}
Chengsong
parents:
diff changeset
  1023
Chengsong
parents:
diff changeset
  1024
import Rexp.Bits
Chengsong
parents:
diff changeset
  1025
abstract class ARexp 
Chengsong
parents:
diff changeset
  1026
case object AZERO extends ARexp
Chengsong
parents:
diff changeset
  1027
case class AONE(bs: Bits) extends ARexp
Chengsong
parents:
diff changeset
  1028
case class ACHAR(bs: Bits, f: Char) extends ARexp
Chengsong
parents:
diff changeset
  1029
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
Chengsong
parents:
diff changeset
  1030
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
Chengsong
parents:
diff changeset
  1031
case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
Chengsong
parents:
diff changeset
  1032
Chengsong
parents:
diff changeset
  1033
Chengsong
parents:
diff changeset
  1034
Chengsong
parents:
diff changeset
  1035
abstract class Val
Chengsong
parents:
diff changeset
  1036
case object Empty extends Val
Chengsong
parents:
diff changeset
  1037
case class Chr(c: Char) extends Val
Chengsong
parents:
diff changeset
  1038
case class Sequ(v1: Val, v2: Val) extends Val
Chengsong
parents:
diff changeset
  1039
case class Left(v: Val) extends Val
Chengsong
parents:
diff changeset
  1040
case class Right(v: Val) extends Val
Chengsong
parents:
diff changeset
  1041
case class Stars(vs: List[Val]) extends Val
Chengsong
parents:
diff changeset
  1042
case class Rec(x: String, v: Val) extends Val
17
Chengsong
parents: 16
diff changeset
  1043
case object undefined extends Val
0
Chengsong
parents:
diff changeset
  1044
//case class Pos(i: Int, v: Val) extends Val
Chengsong
parents:
diff changeset
  1045
case object Prd extends Val