lex_blex_Frankensteined.scala
author Chengsong
Fri, 15 Mar 2019 10:46:46 +0000
changeset 3 f15dccc42c7b
parent 0 902326e1615a
child 5 622ddbb1223a
permissions -rw-r--r--
removing PRED why should it still be there after so many rounds of experiments? It should have been removed long ago since its existence will always make the compiler report errors. mysterious.:
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
Chengsong
parents:
diff changeset
     1
package RexpRelated
Chengsong
parents:
diff changeset
     2
import scala.language.implicitConversions    
Chengsong
parents:
diff changeset
     3
import scala.language.reflectiveCalls
Chengsong
parents:
diff changeset
     4
import scala.annotation.tailrec   
Chengsong
parents:
diff changeset
     5
import scala.util.Try
Chengsong
parents:
diff changeset
     6
Chengsong
parents:
diff changeset
     7
abstract class Bit
Chengsong
parents:
diff changeset
     8
case object Z extends Bit
Chengsong
parents:
diff changeset
     9
case object S extends Bit
Chengsong
parents:
diff changeset
    10
case class C(c: Char) extends Bit
Chengsong
parents:
diff changeset
    11
Chengsong
parents:
diff changeset
    12
Chengsong
parents:
diff changeset
    13
abstract class Rexp 
Chengsong
parents:
diff changeset
    14
case object ZERO extends Rexp
Chengsong
parents:
diff changeset
    15
case object ONE extends Rexp
Chengsong
parents:
diff changeset
    16
case class CHAR(c: Char) extends Rexp
Chengsong
parents:
diff changeset
    17
case class ALTS(rs: List[Rexp]) extends Rexp 
Chengsong
parents:
diff changeset
    18
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
Chengsong
parents:
diff changeset
    19
case class STAR(r: Rexp) extends Rexp 
Chengsong
parents:
diff changeset
    20
case class RECD(x: String, r: Rexp) extends Rexp
Chengsong
parents:
diff changeset
    21
Chengsong
parents:
diff changeset
    22
Chengsong
parents:
diff changeset
    23
Chengsong
parents:
diff changeset
    24
object Rexp{
Chengsong
parents:
diff changeset
    25
  type Bits = List[Bit]
Chengsong
parents:
diff changeset
    26
  // abbreviations
Chengsong
parents:
diff changeset
    27
  type Mon = (Char, Rexp)
Chengsong
parents:
diff changeset
    28
  type Lin = Set[Mon]
Chengsong
parents:
diff changeset
    29
  def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2))
Chengsong
parents:
diff changeset
    30
  def PLUS(r: Rexp) = SEQ(r, STAR(r))
Chengsong
parents:
diff changeset
    31
  def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))
Chengsong
parents:
diff changeset
    32
Chengsong
parents:
diff changeset
    33
Chengsong
parents:
diff changeset
    34
  def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
Chengsong
parents:
diff changeset
    35
    case Nil => Nil
Chengsong
parents:
diff changeset
    36
    case (x::xs) => {
Chengsong
parents:
diff changeset
    37
      val res = f(x)
Chengsong
parents:
diff changeset
    38
      if (acc.contains(res)) distinctBy(xs, f, acc)  
Chengsong
parents:
diff changeset
    39
      else x::distinctBy(xs, f, res::acc)
Chengsong
parents:
diff changeset
    40
    }
Chengsong
parents:
diff changeset
    41
  } 
Chengsong
parents:
diff changeset
    42
  // some convenience for typing in regular expressions
Chengsong
parents:
diff changeset
    43
  def charlist2rexp(s : List[Char]): Rexp = s match {
Chengsong
parents:
diff changeset
    44
    case Nil => ONE
Chengsong
parents:
diff changeset
    45
    case c::Nil => CHAR(c)
Chengsong
parents:
diff changeset
    46
    case c::s => SEQ(CHAR(c), charlist2rexp(s))
Chengsong
parents:
diff changeset
    47
  }
Chengsong
parents:
diff changeset
    48
  implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
Chengsong
parents:
diff changeset
    49
Chengsong
parents:
diff changeset
    50
  implicit def RexpOps(r: Rexp) = new {
Chengsong
parents:
diff changeset
    51
    def | (s: Rexp) = ALT(r, s)
Chengsong
parents:
diff changeset
    52
    def % = STAR(r)
Chengsong
parents:
diff changeset
    53
    def ~ (s: Rexp) = SEQ(r, s)
Chengsong
parents:
diff changeset
    54
  }
Chengsong
parents:
diff changeset
    55
Chengsong
parents:
diff changeset
    56
  implicit def stringOps(s: String) = new {
Chengsong
parents:
diff changeset
    57
    def | (r: Rexp) = ALT(s, r)
Chengsong
parents:
diff changeset
    58
    def | (r: String) = ALT(s, r)
Chengsong
parents:
diff changeset
    59
    def % = STAR(s)
Chengsong
parents:
diff changeset
    60
    def ~ (r: Rexp) = SEQ(s, r)
Chengsong
parents:
diff changeset
    61
    def ~ (r: String) = SEQ(s, r)
Chengsong
parents:
diff changeset
    62
    def $ (r: Rexp) = RECD(s, r)
Chengsong
parents:
diff changeset
    63
  }
Chengsong
parents:
diff changeset
    64
Chengsong
parents:
diff changeset
    65
  // translation into ARexps
Chengsong
parents:
diff changeset
    66
  def fuse(bs: Bits, r: ARexp) : ARexp = r match {
Chengsong
parents:
diff changeset
    67
    case AZERO => AZERO
Chengsong
parents:
diff changeset
    68
    case AONE(cs) => AONE(bs ++ cs)
Chengsong
parents:
diff changeset
    69
    case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
Chengsong
parents:
diff changeset
    70
    case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
Chengsong
parents:
diff changeset
    71
    case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
Chengsong
parents:
diff changeset
    72
    case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
Chengsong
parents:
diff changeset
    73
  }
Chengsong
parents:
diff changeset
    74
Chengsong
parents:
diff changeset
    75
  def internalise(r: Rexp) : ARexp = r match {
Chengsong
parents:
diff changeset
    76
    case ZERO => AZERO
Chengsong
parents:
diff changeset
    77
    case ONE => AONE(Nil)
Chengsong
parents:
diff changeset
    78
    case CHAR(c) => ACHAR(Nil, c)
Chengsong
parents:
diff changeset
    79
    case ALTS(List(r1, r2)) => 
Chengsong
parents:
diff changeset
    80
      AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
Chengsong
parents:
diff changeset
    81
    case ALTS(r1::rs) => {
Chengsong
parents:
diff changeset
    82
      val AALTS(Nil, rs2) = internalise(ALTS(rs))
Chengsong
parents:
diff changeset
    83
      AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
Chengsong
parents:
diff changeset
    84
    }
Chengsong
parents:
diff changeset
    85
    case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
Chengsong
parents:
diff changeset
    86
    case STAR(r) => ASTAR(Nil, internalise(r))
Chengsong
parents:
diff changeset
    87
    case RECD(x, r) => internalise(r)
Chengsong
parents:
diff changeset
    88
  }
Chengsong
parents:
diff changeset
    89
Chengsong
parents:
diff changeset
    90
  internalise(("a" | "ab") ~ ("b" | ""))
3
f15dccc42c7b removing PRED
Chengsong
parents: 0
diff changeset
    91
/*
0
Chengsong
parents:
diff changeset
    92
  def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
Chengsong
parents:
diff changeset
    93
    case (ONE, bs) => (Empty, bs)
Chengsong
parents:
diff changeset
    94
    case (PRED(f), C(c)::bs) => (Chr(c), bs)
Chengsong
parents:
diff changeset
    95
    case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems tailor made for those who want to simplify the regex before der or simp
Chengsong
parents:
diff changeset
    96
    case (ALTS(rs), bs) => bs match {
Chengsong
parents:
diff changeset
    97
      case Z::bs1 => {
Chengsong
parents:
diff changeset
    98
        val (v, bs2) = decode_aux(rs.head, bs1)
Chengsong
parents:
diff changeset
    99
        (Left(v), bs2)
Chengsong
parents:
diff changeset
   100
      }
Chengsong
parents:
diff changeset
   101
      case S::bs1 => {
Chengsong
parents:
diff changeset
   102
        val (v, bs2) = decode_aux(ALTS(rs.tail), bs1)
Chengsong
parents:
diff changeset
   103
        (Right(v), bs2)			
Chengsong
parents:
diff changeset
   104
      }
Chengsong
parents:
diff changeset
   105
    }
Chengsong
parents:
diff changeset
   106
    case (SEQ(r1, r2), bs) => {
Chengsong
parents:
diff changeset
   107
      val (v1, bs1) = decode_aux(r1, bs)
Chengsong
parents:
diff changeset
   108
      val (v2, bs2) = decode_aux(r2, bs1)
Chengsong
parents:
diff changeset
   109
      (Sequ(v1, v2), bs2)
Chengsong
parents:
diff changeset
   110
    }
Chengsong
parents:
diff changeset
   111
    case (STAR(r1), S::bs) => {
Chengsong
parents:
diff changeset
   112
      val (v, bs1) = decode_aux(r1, bs)
Chengsong
parents:
diff changeset
   113
      //println(v)
Chengsong
parents:
diff changeset
   114
      val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
Chengsong
parents:
diff changeset
   115
      (Stars(v::vs), bs2)
Chengsong
parents:
diff changeset
   116
    }
Chengsong
parents:
diff changeset
   117
    case (STAR(_), Z::bs) => (Stars(Nil), bs)
Chengsong
parents:
diff changeset
   118
    case (RECD(x, r1), bs) => {
Chengsong
parents:
diff changeset
   119
      val (v, bs1) = decode_aux(r1, bs)
Chengsong
parents:
diff changeset
   120
      (Rec(x, v), bs1)
Chengsong
parents:
diff changeset
   121
    }
Chengsong
parents:
diff changeset
   122
  }
Chengsong
parents:
diff changeset
   123
Chengsong
parents:
diff changeset
   124
  def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
Chengsong
parents:
diff changeset
   125
    case (v, Nil) => v
Chengsong
parents:
diff changeset
   126
    case _ => throw new Exception("Not decodable")
Chengsong
parents:
diff changeset
   127
  }
3
f15dccc42c7b removing PRED
Chengsong
parents: 0
diff changeset
   128
*/
0
Chengsong
parents:
diff changeset
   129
Chengsong
parents:
diff changeset
   130
  //erase function: extracts the regx from Aregex
Chengsong
parents:
diff changeset
   131
  def erase(r:ARexp): Rexp = r match{
Chengsong
parents:
diff changeset
   132
    case AZERO => ZERO
Chengsong
parents:
diff changeset
   133
    case AONE(_) => ONE
Chengsong
parents:
diff changeset
   134
    case ACHAR(bs, f) => CHAR(f)
Chengsong
parents:
diff changeset
   135
    case AALTS(bs, rs) => ALTS(rs.map(erase(_)))
Chengsong
parents:
diff changeset
   136
    case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
Chengsong
parents:
diff changeset
   137
    case ASTAR(cs, r)=> STAR(erase(r))
Chengsong
parents:
diff changeset
   138
  }
Chengsong
parents:
diff changeset
   139
Chengsong
parents:
diff changeset
   140
  //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART
Chengsong
parents:
diff changeset
   141
  // nullable function: tests whether the regular 
Chengsong
parents:
diff changeset
   142
  // expression can recognise the empty string
Chengsong
parents:
diff changeset
   143
  def nullable (r: Rexp) : Boolean = r match {
Chengsong
parents:
diff changeset
   144
    case ZERO => false
Chengsong
parents:
diff changeset
   145
    case ONE => true
Chengsong
parents:
diff changeset
   146
    case CHAR(_) => false
Chengsong
parents:
diff changeset
   147
    case ALTS(rs) => rs.exists(nullable)
Chengsong
parents:
diff changeset
   148
    case SEQ(r1, r2) => nullable(r1) && nullable(r2)
Chengsong
parents:
diff changeset
   149
    case STAR(_) => true
Chengsong
parents:
diff changeset
   150
    case RECD(_, r) => nullable(r)
Chengsong
parents:
diff changeset
   151
    //case PLUS(r) => nullable(r)
Chengsong
parents:
diff changeset
   152
  }
Chengsong
parents:
diff changeset
   153
Chengsong
parents:
diff changeset
   154
  // derivative of a regular expression w.r.t. a character
Chengsong
parents:
diff changeset
   155
  def der (c: Char, r: Rexp) : Rexp = r match {
Chengsong
parents:
diff changeset
   156
    case ZERO => ZERO
Chengsong
parents:
diff changeset
   157
    case ONE => ZERO
Chengsong
parents:
diff changeset
   158
    case CHAR(f) => if (c == f) ONE else ZERO
Chengsong
parents:
diff changeset
   159
    case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2)))
Chengsong
parents:
diff changeset
   160
    case SEQ(r1, r2) => 
Chengsong
parents:
diff changeset
   161
      if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2)))
Chengsong
parents:
diff changeset
   162
      else SEQ(der(c, r1), r2)
Chengsong
parents:
diff changeset
   163
    case STAR(r) => SEQ(der(c, r), STAR(r))
Chengsong
parents:
diff changeset
   164
    case RECD(_, r1) => der(c, r1)
Chengsong
parents:
diff changeset
   165
    //case PLUS(r) => SEQ(der(c, r), STAR(r))
Chengsong
parents:
diff changeset
   166
  }
Chengsong
parents:
diff changeset
   167
Chengsong
parents:
diff changeset
   168
  def flatten(v: Val) : String = v match {
Chengsong
parents:
diff changeset
   169
    case Empty => ""
Chengsong
parents:
diff changeset
   170
    case Chr(c) => c.toString
Chengsong
parents:
diff changeset
   171
    case Left(v) => flatten(v)
Chengsong
parents:
diff changeset
   172
    case Right(v) => flatten(v)
Chengsong
parents:
diff changeset
   173
    case Sequ(v1, v2) => flatten(v1) + flatten(v2)
Chengsong
parents:
diff changeset
   174
    case Stars(vs) => vs.map(flatten).mkString
Chengsong
parents:
diff changeset
   175
    case Rec(_, v) => flatten(v)
Chengsong
parents:
diff changeset
   176
  }
Chengsong
parents:
diff changeset
   177
Chengsong
parents:
diff changeset
   178
  // extracts an environment from a value
Chengsong
parents:
diff changeset
   179
  def env(v: Val) : List[(String, String)] = v match {
Chengsong
parents:
diff changeset
   180
    case Empty => Nil
Chengsong
parents:
diff changeset
   181
    case Chr(c) => Nil
Chengsong
parents:
diff changeset
   182
    case Left(v) => env(v)
Chengsong
parents:
diff changeset
   183
    case Right(v) => env(v)
Chengsong
parents:
diff changeset
   184
    case Sequ(v1, v2) => env(v1) ::: env(v2)
Chengsong
parents:
diff changeset
   185
    case Stars(vs) => vs.flatMap(env)
Chengsong
parents:
diff changeset
   186
    case Rec(x, v) => (x, flatten(v))::env(v)
Chengsong
parents:
diff changeset
   187
  }
Chengsong
parents:
diff changeset
   188
Chengsong
parents:
diff changeset
   189
Chengsong
parents:
diff changeset
   190
  // injection part
Chengsong
parents:
diff changeset
   191
  def mkeps(r: Rexp) : Val = r match {
Chengsong
parents:
diff changeset
   192
    case ONE => Empty
Chengsong
parents:
diff changeset
   193
    case ALTS(List(r1, r2)) => 
Chengsong
parents:
diff changeset
   194
      if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
Chengsong
parents:
diff changeset
   195
    case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
Chengsong
parents:
diff changeset
   196
    case STAR(r) => Stars(Nil)
Chengsong
parents:
diff changeset
   197
    case RECD(x, r) => Rec(x, mkeps(r))
Chengsong
parents:
diff changeset
   198
    //case PLUS(r) => Stars(List(mkeps(r)))
Chengsong
parents:
diff changeset
   199
  }
Chengsong
parents:
diff changeset
   200
Chengsong
parents:
diff changeset
   201
  def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
Chengsong
parents:
diff changeset
   202
    case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
Chengsong
parents:
diff changeset
   203
    case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
Chengsong
parents:
diff changeset
   204
    case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
Chengsong
parents:
diff changeset
   205
    case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
Chengsong
parents:
diff changeset
   206
    case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1))
Chengsong
parents:
diff changeset
   207
    case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2))
Chengsong
parents:
diff changeset
   208
    case (CHAR(_), Empty) => Chr(c) 
Chengsong
parents:
diff changeset
   209
    case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
Chengsong
parents:
diff changeset
   210
    //case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
Chengsong
parents:
diff changeset
   211
  }
Chengsong
parents:
diff changeset
   212
  def lex(r: Rexp, s: List[Char]) : Val = s match {
Chengsong
parents:
diff changeset
   213
    case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   214
    case c::cs => inj(r, c, lex(der(c, r), cs))
Chengsong
parents:
diff changeset
   215
  }
Chengsong
parents:
diff changeset
   216
Chengsong
parents:
diff changeset
   217
  def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
Chengsong
parents:
diff changeset
   218
Chengsong
parents:
diff changeset
   219
  // some "rectification" functions for simplification
Chengsong
parents:
diff changeset
   220
  def F_ID(v: Val): Val = v
Chengsong
parents:
diff changeset
   221
  def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
Chengsong
parents:
diff changeset
   222
  def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
Chengsong
parents:
diff changeset
   223
  def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
Chengsong
parents:
diff changeset
   224
    case Right(v) => Right(f2(v))
Chengsong
parents:
diff changeset
   225
    case Left(v) => Left(f1(v))
Chengsong
parents:
diff changeset
   226
  }
Chengsong
parents:
diff changeset
   227
  def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
Chengsong
parents:
diff changeset
   228
    case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
Chengsong
parents:
diff changeset
   229
  }
Chengsong
parents:
diff changeset
   230
  def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
Chengsong
parents:
diff changeset
   231
    (v:Val) => Sequ(f1(Empty), f2(v))
Chengsong
parents:
diff changeset
   232
  def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
Chengsong
parents:
diff changeset
   233
    (v:Val) => Sequ(f1(v), f2(Empty))
Chengsong
parents:
diff changeset
   234
  def F_RECD(f: Val => Val) = (v:Val) => v match {
Chengsong
parents:
diff changeset
   235
    case Rec(x, v) => Rec(x, f(v))
Chengsong
parents:
diff changeset
   236
  }
Chengsong
parents:
diff changeset
   237
  def F_ERROR(v: Val): Val = throw new Exception("error")
Chengsong
parents:
diff changeset
   238
Chengsong
parents:
diff changeset
   239
  // simplification of regular expressions returning also an
Chengsong
parents:
diff changeset
   240
  // rectification function; no simplification under STAR 
Chengsong
parents:
diff changeset
   241
  def simp(r: Rexp): (Rexp, Val => Val) = r match {
Chengsong
parents:
diff changeset
   242
    case ALTS(List(r1, r2)) => {
Chengsong
parents:
diff changeset
   243
      val (r1s, f1s) = simp(r1)
Chengsong
parents:
diff changeset
   244
      val (r2s, f2s) = simp(r2)
Chengsong
parents:
diff changeset
   245
      (r1s, r2s) match {
Chengsong
parents:
diff changeset
   246
        case (ZERO, _) => (r2s, F_RIGHT(f2s))
Chengsong
parents:
diff changeset
   247
        case (_, ZERO) => (r1s, F_LEFT(f1s))
Chengsong
parents:
diff changeset
   248
        case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
Chengsong
parents:
diff changeset
   249
                  else (ALTS(List(r1s, r2s)), F_ALT(f1s, f2s)) 
Chengsong
parents:
diff changeset
   250
      }
Chengsong
parents:
diff changeset
   251
    }
Chengsong
parents:
diff changeset
   252
    case SEQ(r1, r2) => {
Chengsong
parents:
diff changeset
   253
      val (r1s, f1s) = simp(r1)
Chengsong
parents:
diff changeset
   254
      val (r2s, f2s) = simp(r2)
Chengsong
parents:
diff changeset
   255
      (r1s, r2s) match {
Chengsong
parents:
diff changeset
   256
        case (ZERO, _) => (ZERO, F_ERROR)
Chengsong
parents:
diff changeset
   257
        case (_, ZERO) => (ZERO, F_ERROR)
Chengsong
parents:
diff changeset
   258
        case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
Chengsong
parents:
diff changeset
   259
        case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
Chengsong
parents:
diff changeset
   260
        case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
Chengsong
parents:
diff changeset
   261
      }
Chengsong
parents:
diff changeset
   262
    }
Chengsong
parents:
diff changeset
   263
    case RECD(x, r1) => {
Chengsong
parents:
diff changeset
   264
      val (r1s, f1s) = simp(r1)
Chengsong
parents:
diff changeset
   265
      (RECD(x, r1s), F_RECD(f1s))
Chengsong
parents:
diff changeset
   266
    }
Chengsong
parents:
diff changeset
   267
    case r => (r, F_ID)
Chengsong
parents:
diff changeset
   268
  }
Chengsong
parents:
diff changeset
   269
  /*
Chengsong
parents:
diff changeset
   270
  val each_simp_time = scala.collection.mutable.ArrayBuffer.empty[Long]
Chengsong
parents:
diff changeset
   271
  val each_simp_timeb = scala.collection.mutable.ArrayBuffer.empty[Long]
Chengsong
parents:
diff changeset
   272
  */
Chengsong
parents:
diff changeset
   273
  def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
Chengsong
parents:
diff changeset
   274
    case Nil => {
Chengsong
parents:
diff changeset
   275
      if (nullable(r)) {
Chengsong
parents:
diff changeset
   276
        mkeps(r) 
Chengsong
parents:
diff changeset
   277
      }
Chengsong
parents:
diff changeset
   278
      else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   279
    }
Chengsong
parents:
diff changeset
   280
    case c::cs => {
Chengsong
parents:
diff changeset
   281
      val (r_simp, f_simp) = simp(der(c, r))
Chengsong
parents:
diff changeset
   282
      inj(r, c, f_simp(lex_simp(r_simp, cs)))
Chengsong
parents:
diff changeset
   283
    }
Chengsong
parents:
diff changeset
   284
  }
Chengsong
parents:
diff changeset
   285
Chengsong
parents:
diff changeset
   286
  def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
Chengsong
parents:
diff changeset
   287
Chengsong
parents:
diff changeset
   288
  //println(lexing_simp(("a" | "ab") ~ ("b" | ""), "ab"))
Chengsong
parents:
diff changeset
   289
Chengsong
parents:
diff changeset
   290
  // filters out all white spaces
Chengsong
parents:
diff changeset
   291
  def tokenise(r: Rexp, s: String) = 
Chengsong
parents:
diff changeset
   292
    env(lexing_simp(r, s)).filterNot { _._1 == "w"}
Chengsong
parents:
diff changeset
   293
Chengsong
parents:
diff changeset
   294
Chengsong
parents:
diff changeset
   295
  //reads the string from a file 
Chengsong
parents:
diff changeset
   296
  def fromFile(name: String) : String = 
Chengsong
parents:
diff changeset
   297
    io.Source.fromFile(name).mkString
Chengsong
parents:
diff changeset
   298
Chengsong
parents:
diff changeset
   299
  def tokenise_file(r: Rexp, name: String) = 
Chengsong
parents:
diff changeset
   300
    tokenise(r, fromFile(name))
Chengsong
parents:
diff changeset
   301
  
Chengsong
parents:
diff changeset
   302
  //   Testing
Chengsong
parents:
diff changeset
   303
  //============
Chengsong
parents:
diff changeset
   304
Chengsong
parents:
diff changeset
   305
  def time[T](code: => T) = {
Chengsong
parents:
diff changeset
   306
    val start = System.nanoTime()
Chengsong
parents:
diff changeset
   307
    val result = code
Chengsong
parents:
diff changeset
   308
    val end = System.nanoTime()
Chengsong
parents:
diff changeset
   309
    println((end - start)/1.0e9)
Chengsong
parents:
diff changeset
   310
    result
Chengsong
parents:
diff changeset
   311
  }
Chengsong
parents:
diff changeset
   312
Chengsong
parents:
diff changeset
   313
  //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART
Chengsong
parents:
diff changeset
   314
Chengsong
parents:
diff changeset
   315
  // bnullable function: tests whether the aregular 
Chengsong
parents:
diff changeset
   316
  // expression can recognise the empty string
Chengsong
parents:
diff changeset
   317
  def bnullable (r: ARexp) : Boolean = r match {
Chengsong
parents:
diff changeset
   318
    case AZERO => false
Chengsong
parents:
diff changeset
   319
    case AONE(_) => true
Chengsong
parents:
diff changeset
   320
    case ACHAR(_,_) => false
Chengsong
parents:
diff changeset
   321
    case AALTS(_, rs) => rs.exists(bnullable)
Chengsong
parents:
diff changeset
   322
    case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
Chengsong
parents:
diff changeset
   323
    case ASTAR(_, _) => true
Chengsong
parents:
diff changeset
   324
  }
Chengsong
parents:
diff changeset
   325
Chengsong
parents:
diff changeset
   326
  def mkepsBC(r: ARexp) : Bits = r match {
Chengsong
parents:
diff changeset
   327
    case AONE(bs) => bs
Chengsong
parents:
diff changeset
   328
    case AALTS(bs, rs) => {
Chengsong
parents:
diff changeset
   329
      val n = rs.indexWhere(bnullable)
Chengsong
parents:
diff changeset
   330
      bs ++ mkepsBC(rs(n))
Chengsong
parents:
diff changeset
   331
    }
Chengsong
parents:
diff changeset
   332
    case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
Chengsong
parents:
diff changeset
   333
    case ASTAR(bs, r) => bs ++ List(Z)
Chengsong
parents:
diff changeset
   334
  }
Chengsong
parents:
diff changeset
   335
Chengsong
parents:
diff changeset
   336
  // derivative of a regular expression w.r.t. a character
Chengsong
parents:
diff changeset
   337
  def bder(c: Char, r: ARexp) : ARexp = r match {
Chengsong
parents:
diff changeset
   338
    case AZERO => AZERO
Chengsong
parents:
diff changeset
   339
    case AONE(_) => AZERO
Chengsong
parents:
diff changeset
   340
    case ACHAR(bs, f) => if (c == f) AONE(bs:::List(C(c))) else AZERO
Chengsong
parents:
diff changeset
   341
    case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
Chengsong
parents:
diff changeset
   342
    case ASEQ(bs, r1, r2) => 
Chengsong
parents:
diff changeset
   343
      if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(mkepsBC(r1), bder(c, r2)))
Chengsong
parents:
diff changeset
   344
      else ASEQ(bs, bder(c, r1), r2)
Chengsong
parents:
diff changeset
   345
    case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
Chengsong
parents:
diff changeset
   346
  }
Chengsong
parents:
diff changeset
   347
Chengsong
parents:
diff changeset
   348
Chengsong
parents:
diff changeset
   349
  def ders (s: List[Char], r: Rexp) : Rexp = s match {
Chengsong
parents:
diff changeset
   350
    case Nil => r
Chengsong
parents:
diff changeset
   351
    case c::s => ders(s, der(c, r))
Chengsong
parents:
diff changeset
   352
  }
Chengsong
parents:
diff changeset
   353
Chengsong
parents:
diff changeset
   354
  // derivative w.r.t. a string (iterates bder)
Chengsong
parents:
diff changeset
   355
  @tailrec
Chengsong
parents:
diff changeset
   356
  def bders (s: List[Char], r: ARexp) : ARexp = s match {
Chengsong
parents:
diff changeset
   357
    case Nil => r
Chengsong
parents:
diff changeset
   358
    case c::s => bders(s, bder(c, r))
Chengsong
parents:
diff changeset
   359
  }
Chengsong
parents:
diff changeset
   360
Chengsong
parents:
diff changeset
   361
  def flats(rs: List[ARexp]): List[ARexp] = rs match {
Chengsong
parents:
diff changeset
   362
      case Nil => Nil
Chengsong
parents:
diff changeset
   363
      case AZERO :: rs1 => flats(rs1)
Chengsong
parents:
diff changeset
   364
      case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
Chengsong
parents:
diff changeset
   365
      case r1 :: rs2 => r1 :: flats(rs2)
Chengsong
parents:
diff changeset
   366
    }
Chengsong
parents:
diff changeset
   367
  def rflats(rs: List[Rexp]): List[Rexp] = rs match {
Chengsong
parents:
diff changeset
   368
    case Nil => Nil
Chengsong
parents:
diff changeset
   369
    case ZERO :: rs1 => rflats(rs1)
Chengsong
parents:
diff changeset
   370
    case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2)
Chengsong
parents:
diff changeset
   371
    case r1 :: rs2 => r1 :: rflats(rs2)
Chengsong
parents:
diff changeset
   372
  }
Chengsong
parents:
diff changeset
   373
  //val flats_time = scala.collection.mutable.ArrayBuffer.empty[Long]
Chengsong
parents:
diff changeset
   374
  //val dist_time = scala.collection.mutable.ArrayBuffer.empty[Long]
Chengsong
parents:
diff changeset
   375
  var flats_time = 0L
Chengsong
parents:
diff changeset
   376
  var dist_time = 0L
Chengsong
parents:
diff changeset
   377
  /*
Chengsong
parents:
diff changeset
   378
  def bsimp(r: ARexp, depth: Int): ARexp = 
Chengsong
parents:
diff changeset
   379
  {
Chengsong
parents:
diff changeset
   380
    r match {
Chengsong
parents:
diff changeset
   381
      case ASEQ(bs1, r1, r2) => (bsimp(r1, depth), bsimp(r2, depth)) match {
Chengsong
parents:
diff changeset
   382
          case (AZERO, _) => AZERO
Chengsong
parents:
diff changeset
   383
          case (_, AZERO) => AZERO
Chengsong
parents:
diff changeset
   384
          case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
Chengsong
parents:
diff changeset
   385
          case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
Chengsong
parents:
diff changeset
   386
      }
Chengsong
parents:
diff changeset
   387
      case AALTS(bs1, rs) => {
Chengsong
parents:
diff changeset
   388
        depth match {
Chengsong
parents:
diff changeset
   389
          case 0 => {
Chengsong
parents:
diff changeset
   390
            flats(distinctBy(rs, erase)) match {
Chengsong
parents:
diff changeset
   391
              case Nil => AZERO
Chengsong
parents:
diff changeset
   392
              case s :: Nil => fuse(bs1, s)
Chengsong
parents:
diff changeset
   393
              case rs => AALTS(bs1, rs) 
Chengsong
parents:
diff changeset
   394
            } 
Chengsong
parents:
diff changeset
   395
          }
Chengsong
parents:
diff changeset
   396
          case n => {
Chengsong
parents:
diff changeset
   397
            val rs_simp = rs.map(bsimp(_, n - 1))
Chengsong
parents:
diff changeset
   398
            val time2 = System.nanoTime()
Chengsong
parents:
diff changeset
   399
            val flat_res = flats(rs_simp)
Chengsong
parents:
diff changeset
   400
            val time3 = System.nanoTime()
Chengsong
parents:
diff changeset
   401
            val dist_res = distinctBy(flat_res, erase)
Chengsong
parents:
diff changeset
   402
            val time4 = System.nanoTime()
Chengsong
parents:
diff changeset
   403
            flats_time = flats_time + time3 - time2
Chengsong
parents:
diff changeset
   404
            dist_time = dist_time + time4 - time3
Chengsong
parents:
diff changeset
   405
            //flats_time += time3 - time2
Chengsong
parents:
diff changeset
   406
            //dist_time += time4 - time3
Chengsong
parents:
diff changeset
   407
            //distinctBy(flats(rs.map(bsimp)), erase) match {
Chengsong
parents:
diff changeset
   408
            dist_res match {
Chengsong
parents:
diff changeset
   409
              case Nil => AZERO
Chengsong
parents:
diff changeset
   410
              case s :: Nil => fuse(bs1, s)
Chengsong
parents:
diff changeset
   411
              case rs => AALTS(bs1, rs)  
Chengsong
parents:
diff changeset
   412
            }
Chengsong
parents:
diff changeset
   413
          }
Chengsong
parents:
diff changeset
   414
        }
Chengsong
parents:
diff changeset
   415
      }
Chengsong
parents:
diff changeset
   416
      case r => r
Chengsong
parents:
diff changeset
   417
    }
Chengsong
parents:
diff changeset
   418
  }
Chengsong
parents:
diff changeset
   419
  */
Chengsong
parents:
diff changeset
   420
  //----------------------------------------------------------------------------This bsimp is the original slow one
Chengsong
parents:
diff changeset
   421
  
Chengsong
parents:
diff changeset
   422
  def bsimp(r: ARexp): ARexp = r match {
Chengsong
parents:
diff changeset
   423
    case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
Chengsong
parents:
diff changeset
   424
        case (AZERO, _) => AZERO
Chengsong
parents:
diff changeset
   425
        case (_, AZERO) => AZERO
Chengsong
parents:
diff changeset
   426
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
Chengsong
parents:
diff changeset
   427
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
Chengsong
parents:
diff changeset
   428
    }
Chengsong
parents:
diff changeset
   429
    case AALTS(bs1, rs) => {
Chengsong
parents:
diff changeset
   430
      val rs_simp = rs.map(bsimp)
Chengsong
parents:
diff changeset
   431
      val time2 = System.nanoTime()
Chengsong
parents:
diff changeset
   432
      val flat_res = flats(rs_simp)
Chengsong
parents:
diff changeset
   433
      val time3 = System.nanoTime()
Chengsong
parents:
diff changeset
   434
      val dist_res = distinctBy(flat_res, erase)
Chengsong
parents:
diff changeset
   435
      val time4 = System.nanoTime()
Chengsong
parents:
diff changeset
   436
      flats_time = flats_time + time3 - time2
Chengsong
parents:
diff changeset
   437
      dist_time = dist_time + time4 - time3
Chengsong
parents:
diff changeset
   438
      dist_res match {
Chengsong
parents:
diff changeset
   439
        case Nil => AZERO
Chengsong
parents:
diff changeset
   440
        case s :: Nil => fuse(bs1, s)
Chengsong
parents:
diff changeset
   441
        case rs => AALTS(bs1, rs)  
Chengsong
parents:
diff changeset
   442
      }
Chengsong
parents:
diff changeset
   443
    }
Chengsong
parents:
diff changeset
   444
    case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
Chengsong
parents:
diff changeset
   445
    case r => r
Chengsong
parents:
diff changeset
   446
  }
Chengsong
parents:
diff changeset
   447
Chengsong
parents:
diff changeset
   448
  def bsimp_weakened(r: ARexp): ARexp = r match {
Chengsong
parents:
diff changeset
   449
    case ASEQ(bs1, r1, r2) => (bsimp_weakened(r1), r2) match {
Chengsong
parents:
diff changeset
   450
        case (AZERO, _) => AZERO
Chengsong
parents:
diff changeset
   451
        case (_, AZERO) => AZERO
Chengsong
parents:
diff changeset
   452
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
Chengsong
parents:
diff changeset
   453
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
Chengsong
parents:
diff changeset
   454
    }
Chengsong
parents:
diff changeset
   455
    case AALTS(bs1, rs) => {
Chengsong
parents:
diff changeset
   456
      val rs_simp = rs.map(bsimp_weakened)
Chengsong
parents:
diff changeset
   457
      val time2 = System.nanoTime()
Chengsong
parents:
diff changeset
   458
      val flat_res = flats(rs_simp)
Chengsong
parents:
diff changeset
   459
      val time3 = System.nanoTime()
Chengsong
parents:
diff changeset
   460
      val dist_res = distinctBy(flat_res, erase)
Chengsong
parents:
diff changeset
   461
      val time4 = System.nanoTime()
Chengsong
parents:
diff changeset
   462
      flats_time = flats_time + time3 - time2
Chengsong
parents:
diff changeset
   463
      dist_time = dist_time + time4 - time3
Chengsong
parents:
diff changeset
   464
      dist_res match {
Chengsong
parents:
diff changeset
   465
        case Nil => AZERO
Chengsong
parents:
diff changeset
   466
        case s :: Nil => fuse(bs1, s)
Chengsong
parents:
diff changeset
   467
        case rs => AALTS(bs1, rs)  
Chengsong
parents:
diff changeset
   468
      }
Chengsong
parents:
diff changeset
   469
    }
Chengsong
parents:
diff changeset
   470
    case ASTAR(bs, r) => ASTAR(bs, bsimp_weakened(r))
Chengsong
parents:
diff changeset
   471
    case r => r
Chengsong
parents:
diff changeset
   472
  }
Chengsong
parents:
diff changeset
   473
Chengsong
parents:
diff changeset
   474
  def simp_weakened(r: Rexp): Rexp = r match {
Chengsong
parents:
diff changeset
   475
    case SEQ(r1, r2) => (simp_weakened(r1), r2) match {
Chengsong
parents:
diff changeset
   476
        case (ZERO, _) => ZERO
Chengsong
parents:
diff changeset
   477
        case (_, ZERO) => ZERO
Chengsong
parents:
diff changeset
   478
        case (ONE, r2s) => r2s
Chengsong
parents:
diff changeset
   479
        case (r1s, r2s) => SEQ(r1s, r2s)
Chengsong
parents:
diff changeset
   480
    }
Chengsong
parents:
diff changeset
   481
    case ALTS(rs) => {
Chengsong
parents:
diff changeset
   482
      val rs_simp = rs.map(simp_weakened)
Chengsong
parents:
diff changeset
   483
      val flat_res = rflats(rs_simp)
Chengsong
parents:
diff changeset
   484
      val dist_res = rs_simp.distinct
Chengsong
parents:
diff changeset
   485
      dist_res match {
Chengsong
parents:
diff changeset
   486
        case Nil => ZERO
Chengsong
parents:
diff changeset
   487
        case s :: Nil => s
Chengsong
parents:
diff changeset
   488
        case rs => ALTS(rs)  
Chengsong
parents:
diff changeset
   489
      }
Chengsong
parents:
diff changeset
   490
    }
Chengsong
parents:
diff changeset
   491
    case STAR(r) => STAR(simp_weakened(r))
Chengsong
parents:
diff changeset
   492
    case r => r
Chengsong
parents:
diff changeset
   493
  }
Chengsong
parents:
diff changeset
   494
    
Chengsong
parents:
diff changeset
   495
  
Chengsong
parents:
diff changeset
   496
  //----------------------------------------------------------------------------experiment bsimp
Chengsong
parents:
diff changeset
   497
  /*
Chengsong
parents:
diff changeset
   498
  def bders_simp (s: List[Char], r: ARexp) : ARexp = s match {
Chengsong
parents:
diff changeset
   499
    case Nil => r
Chengsong
parents:
diff changeset
   500
    case c::s => bders_simp(s, bsimp(bder(c, r)))
Chengsong
parents:
diff changeset
   501
  }
Chengsong
parents:
diff changeset
   502
  */
Chengsong
parents:
diff changeset
   503
  /*
Chengsong
parents:
diff changeset
   504
  def time[T](code: => T) = {
Chengsong
parents:
diff changeset
   505
    val start = System.nanoTime()
Chengsong
parents:
diff changeset
   506
    val result = code
Chengsong
parents:
diff changeset
   507
    val end = System.nanoTime()
Chengsong
parents:
diff changeset
   508
    println((end - start)/1.0e9)
Chengsong
parents:
diff changeset
   509
    result
Chengsong
parents:
diff changeset
   510
  }
Chengsong
parents:
diff changeset
   511
  */
Chengsong
parents:
diff changeset
   512
  // main unsimplified lexing function (produces a value)
Chengsong
parents:
diff changeset
   513
  def blex(r: ARexp, s: List[Char]) : Bits = s match {
Chengsong
parents:
diff changeset
   514
    case Nil => if (bnullable(r)) mkepsBC(r) else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   515
    case c::cs => {
Chengsong
parents:
diff changeset
   516
      val der_res = bder(c,r)
Chengsong
parents:
diff changeset
   517
      blex(der_res, cs)
Chengsong
parents:
diff changeset
   518
    }
Chengsong
parents:
diff changeset
   519
  }
Chengsong
parents:
diff changeset
   520
Chengsong
parents:
diff changeset
   521
  def bpre_lexing(r: Rexp, s: String) = blex(internalise(r), s.toList)
Chengsong
parents:
diff changeset
   522
  //def blexing(r: Rexp, s: String) : Val = decode(r, blex(internalise(r), s.toList))
Chengsong
parents:
diff changeset
   523
Chengsong
parents:
diff changeset
   524
  var bder_time = 0L
Chengsong
parents:
diff changeset
   525
  var bsimp_time = 0L
Chengsong
parents:
diff changeset
   526
  var mkepsBC_time = 0L
Chengsong
parents:
diff changeset
   527
  var small_de = 2
Chengsong
parents:
diff changeset
   528
  var big_de = 5
Chengsong
parents:
diff changeset
   529
  var usual_de = 3
Chengsong
parents:
diff changeset
   530
  
Chengsong
parents:
diff changeset
   531
  def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
Chengsong
parents:
diff changeset
   532
    case Nil => {
Chengsong
parents:
diff changeset
   533
      if (bnullable(r)) {
Chengsong
parents:
diff changeset
   534
        //println(asize(r))
Chengsong
parents:
diff changeset
   535
        val time4 = System.nanoTime()
Chengsong
parents:
diff changeset
   536
        val bits = mkepsBC(r)
Chengsong
parents:
diff changeset
   537
        val time5 = System.nanoTime()
Chengsong
parents:
diff changeset
   538
        mkepsBC_time = time5 - time4
Chengsong
parents:
diff changeset
   539
        bits
Chengsong
parents:
diff changeset
   540
      }
Chengsong
parents:
diff changeset
   541
    else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   542
    }
Chengsong
parents:
diff changeset
   543
    case c::cs => {
Chengsong
parents:
diff changeset
   544
      val time1 = System.nanoTime()
Chengsong
parents:
diff changeset
   545
      val der_res = bder(c,r)
Chengsong
parents:
diff changeset
   546
      val time2 = System.nanoTime()
Chengsong
parents:
diff changeset
   547
      val simp_res = bsimp(der_res)
Chengsong
parents:
diff changeset
   548
      val time3 = System.nanoTime()  
Chengsong
parents:
diff changeset
   549
      bder_time = bder_time + time2 - time1
Chengsong
parents:
diff changeset
   550
      bsimp_time = bsimp_time + time3 - time2
Chengsong
parents:
diff changeset
   551
      blex_simp(simp_res, cs)      
Chengsong
parents:
diff changeset
   552
    }
Chengsong
parents:
diff changeset
   553
  }
Chengsong
parents:
diff changeset
   554
  def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{
Chengsong
parents:
diff changeset
   555
    case Nil => r
Chengsong
parents:
diff changeset
   556
    case c::cs => blex_real_simp(bsimp(bder(c, r)), cs)
Chengsong
parents:
diff changeset
   557
  }
Chengsong
parents:
diff changeset
   558
Chengsong
parents:
diff changeset
   559
  //-------------------------------------------------------------------------------------tests whether simp(simp(r)) == simp(r) holds true
Chengsong
parents:
diff changeset
   560
  /*
Chengsong
parents:
diff changeset
   561
  def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
Chengsong
parents:
diff changeset
   562
    case Nil => {
Chengsong
parents:
diff changeset
   563
      if (bnullable(r)) {
Chengsong
parents:
diff changeset
   564
        //println(asize(r))
Chengsong
parents:
diff changeset
   565
        mkepsBC(r)
Chengsong
parents:
diff changeset
   566
      }
Chengsong
parents:
diff changeset
   567
    else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   568
    }
Chengsong
parents:
diff changeset
   569
    case c::cs => {
Chengsong
parents:
diff changeset
   570
      val der_res = bder(c,r)
Chengsong
parents:
diff changeset
   571
      val simp_res = bsimp(der_res)  
Chengsong
parents:
diff changeset
   572
      //val simp_res2 = bsimp(simp_res)  
Chengsong
parents:
diff changeset
   573
      //println("Size reduction from "+asize(der_res)+ " to " +asize(simp_res)+" to " + asize(simp_res2)) 
Chengsong
parents:
diff changeset
   574
      blex_simp(simp_res, cs)
Chengsong
parents:
diff changeset
   575
    }
Chengsong
parents:
diff changeset
   576
  }
Chengsong
parents:
diff changeset
   577
  */
Chengsong
parents:
diff changeset
   578
  /*
Chengsong
parents:
diff changeset
   579
  def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
Chengsong
parents:
diff changeset
   580
    case Nil => {
Chengsong
parents:
diff changeset
   581
      if (nullable(r)) {
Chengsong
parents:
diff changeset
   582
        mkeps(r) 
Chengsong
parents:
diff changeset
   583
      }
Chengsong
parents:
diff changeset
   584
      else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   585
    }
Chengsong
parents:
diff changeset
   586
    case c::cs => {
Chengsong
parents:
diff changeset
   587
      val start = System.nanoTime()
Chengsong
parents:
diff changeset
   588
      val (r_simp, f_simp) = simp(der(c, r))
Chengsong
parents:
diff changeset
   589
      val end = System.nanoTime()
Chengsong
parents:
diff changeset
   590
      println((end - start)/1.0e9)
Chengsong
parents:
diff changeset
   591
      inj(r, c, f_simp(lex_simp(r_simp, cs)))
Chengsong
parents:
diff changeset
   592
    }
Chengsong
parents:
diff changeset
   593
  }
Chengsong
parents:
diff changeset
   594
  */
Chengsong
parents:
diff changeset
   595
Chengsong
parents:
diff changeset
   596
  //size: of a Aregx for testing purposes 
Chengsong
parents:
diff changeset
   597
  def size(r: Rexp) : Int = r match {
Chengsong
parents:
diff changeset
   598
    case ZERO => 1
Chengsong
parents:
diff changeset
   599
    case ONE => 1
Chengsong
parents:
diff changeset
   600
    case CHAR(_) => 1
Chengsong
parents:
diff changeset
   601
    case SEQ(r1, r2) => 1 + size(r1) + size(r2)
Chengsong
parents:
diff changeset
   602
    case ALTS(rs) => 1 + rs.map(size).sum
Chengsong
parents:
diff changeset
   603
    case STAR(r) => 1 + size(r)
Chengsong
parents:
diff changeset
   604
  }
Chengsong
parents:
diff changeset
   605
Chengsong
parents:
diff changeset
   606
  def asize(a: ARexp) = size(erase(a))
Chengsong
parents:
diff changeset
   607
Chengsong
parents:
diff changeset
   608
Chengsong
parents:
diff changeset
   609
  // decoding does not work yet
Chengsong
parents:
diff changeset
   610
  def blexing_simp(r: Rexp, s: String) = {
Chengsong
parents:
diff changeset
   611
    //flats_time.clear()
Chengsong
parents:
diff changeset
   612
    //dist_time.clear()
Chengsong
parents:
diff changeset
   613
    flats_time = 0L
Chengsong
parents:
diff changeset
   614
    dist_time = 0L
Chengsong
parents:
diff changeset
   615
    bder_time = 0L
Chengsong
parents:
diff changeset
   616
    bsimp_time = 0L
Chengsong
parents:
diff changeset
   617
    mkepsBC_time = 0L
Chengsong
parents:
diff changeset
   618
    val start = System.nanoTime()
Chengsong
parents:
diff changeset
   619
    val bit_code = blex_simp(internalise(r), s.toList)
Chengsong
parents:
diff changeset
   620
    val end = System.nanoTime()
Chengsong
parents:
diff changeset
   621
    println("total time: "+ (end - start)/1.0e9)
Chengsong
parents:
diff changeset
   622
    println("spent on flats: " + (flats_time/(1.0e9)))
Chengsong
parents:
diff changeset
   623
    println("spent on distinctBy: " + (dist_time/(1.0e9)))
Chengsong
parents:
diff changeset
   624
    println("spent on bder: "+ bder_time/1.0e9)
Chengsong
parents:
diff changeset
   625
    println("spent on bsimp: " + bsimp_time/1.0e9)
Chengsong
parents:
diff changeset
   626
    println("spent on mkepsBC: " + mkepsBC_time/1.0e9)
Chengsong
parents:
diff changeset
   627
    //println(s"The length of the string ${s.length}; length of bit sequence:")
Chengsong
parents:
diff changeset
   628
    //println((bit_code.length))
Chengsong
parents:
diff changeset
   629
    //println(final_derivative)
Chengsong
parents:
diff changeset
   630
    //bit_code
Chengsong
parents:
diff changeset
   631
    //decode(r, bit_code) 
Chengsong
parents:
diff changeset
   632
  }
Chengsong
parents:
diff changeset
   633
Chengsong
parents:
diff changeset
   634
Chengsong
parents:
diff changeset
   635
Chengsong
parents:
diff changeset
   636
Chengsong
parents:
diff changeset
   637
Chengsong
parents:
diff changeset
   638
  // Lexing Rules for a Small While Language
Chengsong
parents:
diff changeset
   639
Chengsong
parents:
diff changeset
   640
  //symbols
Chengsong
parents:
diff changeset
   641
  /*
Chengsong
parents:
diff changeset
   642
  val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_))
Chengsong
parents:
diff changeset
   643
  
Chengsong
parents:
diff changeset
   644
  //digits
Chengsong
parents:
diff changeset
   645
  val DIGIT = PRED("0123456789".contains(_))
Chengsong
parents:
diff changeset
   646
  //identifiers
Chengsong
parents:
diff changeset
   647
  val ID = SYM ~ (SYM | DIGIT).% 
Chengsong
parents:
diff changeset
   648
  //numbers
Chengsong
parents:
diff changeset
   649
  val NUM = STAR(DIGIT)
Chengsong
parents:
diff changeset
   650
  //keywords
Chengsong
parents:
diff changeset
   651
  val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false"
Chengsong
parents:
diff changeset
   652
  val AKEYWORD: Rexp = ALTS(List("skip" , "while" , "do" , "if" , "then" , "else" , "read" , "write" , "true" , "false"))
Chengsong
parents:
diff changeset
   653
  //semicolons
Chengsong
parents:
diff changeset
   654
  val SEMI: Rexp = ";"
Chengsong
parents:
diff changeset
   655
  //operators
Chengsong
parents:
diff changeset
   656
  val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
Chengsong
parents:
diff changeset
   657
  val AOP: Rexp = ALTS(List(":=" , "==" , "-" , "+" , "*" , "!=" , "<" , ">" , "<=" , ">=" , "%" , "/"))
Chengsong
parents:
diff changeset
   658
  //whitespaces
Chengsong
parents:
diff changeset
   659
  val WHITESPACE = PLUS(" " | "\n" | "\t")
Chengsong
parents:
diff changeset
   660
  //parentheses
Chengsong
parents:
diff changeset
   661
  val RPAREN: Rexp = ")"
Chengsong
parents:
diff changeset
   662
  val LPAREN: Rexp = "("
Chengsong
parents:
diff changeset
   663
  val BEGIN: Rexp = "{"
Chengsong
parents:
diff changeset
   664
  val END: Rexp = "}"
Chengsong
parents:
diff changeset
   665
  //strings...but probably needs not
Chengsong
parents:
diff changeset
   666
  val STRING: Rexp = "\"" ~ SYM.% ~ "\""
Chengsong
parents:
diff changeset
   667
Chengsong
parents:
diff changeset
   668
Chengsong
parents:
diff changeset
   669
Chengsong
parents:
diff changeset
   670
  val WHILE_REGS = (("k" $ KEYWORD) | 
Chengsong
parents:
diff changeset
   671
                    ("i" $ ID) | 
Chengsong
parents:
diff changeset
   672
                    ("o" $ OP) | 
Chengsong
parents:
diff changeset
   673
                    ("n" $ NUM) | 
Chengsong
parents:
diff changeset
   674
                    ("s" $ SEMI) | 
Chengsong
parents:
diff changeset
   675
                    ("str" $ STRING) |
Chengsong
parents:
diff changeset
   676
                    ("p" $ (LPAREN | RPAREN)) | 
Chengsong
parents:
diff changeset
   677
                    ("b" $ (BEGIN | END)) | 
Chengsong
parents:
diff changeset
   678
                    ("w" $ WHITESPACE)).%
Chengsong
parents:
diff changeset
   679
Chengsong
parents:
diff changeset
   680
  val AWHILE_REGS = (
Chengsong
parents:
diff changeset
   681
    ALTS(
Chengsong
parents:
diff changeset
   682
      List(
Chengsong
parents:
diff changeset
   683
        ("k" $ AKEYWORD), 
Chengsong
parents:
diff changeset
   684
                    ("i" $ ID),
Chengsong
parents:
diff changeset
   685
                    ("o" $ AOP) , 
Chengsong
parents:
diff changeset
   686
                    ("n" $ NUM) ,
Chengsong
parents:
diff changeset
   687
                    ("s" $ SEMI) ,
Chengsong
parents:
diff changeset
   688
                    ("str" $ STRING), 
Chengsong
parents:
diff changeset
   689
                    ("p" $ (LPAREN | RPAREN)), 
Chengsong
parents:
diff changeset
   690
                    ("b" $ (BEGIN | END)), 
Chengsong
parents:
diff changeset
   691
                    ("w" $ WHITESPACE)
Chengsong
parents:
diff changeset
   692
      )
Chengsong
parents:
diff changeset
   693
    )
Chengsong
parents:
diff changeset
   694
  ).%
Chengsong
parents:
diff changeset
   695
Chengsong
parents:
diff changeset
   696
*/
Chengsong
parents:
diff changeset
   697
Chengsong
parents:
diff changeset
   698
Chengsong
parents:
diff changeset
   699
  //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART (TESTING)
Chengsong
parents:
diff changeset
   700
  /*
Chengsong
parents:
diff changeset
   701
  // Two Simple While programs
Chengsong
parents:
diff changeset
   702
  //========================
Chengsong
parents:
diff changeset
   703
  println("prog0 test")
Chengsong
parents:
diff changeset
   704
Chengsong
parents:
diff changeset
   705
  val prog0 = """read n"""
Chengsong
parents:
diff changeset
   706
  println(env(lexing_simp(WHILE_REGS, prog0)))
Chengsong
parents:
diff changeset
   707
  println(tokenise(WHILE_REGS, prog0))
Chengsong
parents:
diff changeset
   708
Chengsong
parents:
diff changeset
   709
  println("prog1 test")
Chengsong
parents:
diff changeset
   710
Chengsong
parents:
diff changeset
   711
  val prog1 = """read  n; write (n)"""
Chengsong
parents:
diff changeset
   712
  println(tokenise(WHILE_REGS, prog1))
Chengsong
parents:
diff changeset
   713
Chengsong
parents:
diff changeset
   714
  */
Chengsong
parents:
diff changeset
   715
  // Bigger Tests
Chengsong
parents:
diff changeset
   716
  //==============
Chengsong
parents:
diff changeset
   717
Chengsong
parents:
diff changeset
   718
  def escape(raw: String): String = {
Chengsong
parents:
diff changeset
   719
    import scala.reflect.runtime.universe._
Chengsong
parents:
diff changeset
   720
    Literal(Constant(raw)).toString
Chengsong
parents:
diff changeset
   721
  }
Chengsong
parents:
diff changeset
   722
Chengsong
parents:
diff changeset
   723
  val prog2 = """
Chengsong
parents:
diff changeset
   724
  write "Fib";
Chengsong
parents:
diff changeset
   725
  read n;
Chengsong
parents:
diff changeset
   726
  minus1 := 0;
Chengsong
parents:
diff changeset
   727
  minus2 := 1;
Chengsong
parents:
diff changeset
   728
  while n > 0 do {
Chengsong
parents:
diff changeset
   729
    temp := minus2;
Chengsong
parents:
diff changeset
   730
    minus2 := minus1 + minus2;
Chengsong
parents:
diff changeset
   731
    minus1 := temp;
Chengsong
parents:
diff changeset
   732
    n := n - 1
Chengsong
parents:
diff changeset
   733
  };
Chengsong
parents:
diff changeset
   734
  write "Result";
Chengsong
parents:
diff changeset
   735
  write minus2
Chengsong
parents:
diff changeset
   736
  """
Chengsong
parents:
diff changeset
   737
Chengsong
parents:
diff changeset
   738
  val prog3 = """
Chengsong
parents:
diff changeset
   739
  start := 1000;
Chengsong
parents:
diff changeset
   740
  x := start;
Chengsong
parents:
diff changeset
   741
  y := start;
Chengsong
parents:
diff changeset
   742
  z := start;
Chengsong
parents:
diff changeset
   743
  while 0 < x do {
Chengsong
parents:
diff changeset
   744
  while 0 < y do {
Chengsong
parents:
diff changeset
   745
    while 0 < z do {
Chengsong
parents:
diff changeset
   746
      z := z - 1
Chengsong
parents:
diff changeset
   747
    };
Chengsong
parents:
diff changeset
   748
    z := start;
Chengsong
parents:
diff changeset
   749
    y := y - 1
Chengsong
parents:
diff changeset
   750
  };     
Chengsong
parents:
diff changeset
   751
  y := start;
Chengsong
parents:
diff changeset
   752
  x := x - 1
Chengsong
parents:
diff changeset
   753
  }
Chengsong
parents:
diff changeset
   754
  """
Chengsong
parents:
diff changeset
   755
  /*
Chengsong
parents:
diff changeset
   756
  for(i <- 400 to 400 by 1){
Chengsong
parents:
diff changeset
   757
    println(i+":")
Chengsong
parents:
diff changeset
   758
    blexing_simp(WHILE_REGS, prog2 * i)
Chengsong
parents:
diff changeset
   759
  } */
Chengsong
parents:
diff changeset
   760
Chengsong
parents:
diff changeset
   761
    /*
Chengsong
parents:
diff changeset
   762
    for (i <- 2 to 5){
Chengsong
parents:
diff changeset
   763
      for(j <- 1 to 3){
Chengsong
parents:
diff changeset
   764
        println(i,j)
Chengsong
parents:
diff changeset
   765
        small_de = i
Chengsong
parents:
diff changeset
   766
        usual_de = i + j
Chengsong
parents:
diff changeset
   767
        big_de = i + 2*j 
Chengsong
parents:
diff changeset
   768
        blexing_simp(AWHILE_REGS, prog2 * 100)
Chengsong
parents:
diff changeset
   769
      }
Chengsong
parents:
diff changeset
   770
    }*/
Chengsong
parents:
diff changeset
   771
Chengsong
parents:
diff changeset
   772
  /*
Chengsong
parents:
diff changeset
   773
  println("Tokens of prog2")
Chengsong
parents:
diff changeset
   774
  println(tokenise(WHILE_REGS, prog2).mkString("\n"))
Chengsong
parents:
diff changeset
   775
Chengsong
parents:
diff changeset
   776
  val fib_tokens = tokenise(WHILE_REGS, prog2)
Chengsong
parents:
diff changeset
   777
  fib_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
Chengsong
parents:
diff changeset
   778
Chengsong
parents:
diff changeset
   779
Chengsong
parents:
diff changeset
   780
  val test_tokens = tokenise(WHILE_REGS, prog3)
Chengsong
parents:
diff changeset
   781
  test_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
Chengsong
parents:
diff changeset
   782
  */
Chengsong
parents:
diff changeset
   783
Chengsong
parents:
diff changeset
   784
  /*
Chengsong
parents:
diff changeset
   785
  println("time test for blexing_simp")
Chengsong
parents:
diff changeset
   786
  for (i <- 1 to 1 by 1) {
Chengsong
parents:
diff changeset
   787
    lexing_simp(WHILE_REGS, prog2 * i)
Chengsong
parents:
diff changeset
   788
    blexing_simp(WHILE_REGS, prog2 * i)
Chengsong
parents:
diff changeset
   789
    for( j <- 0 to each_simp_timeb.length - 1){
Chengsong
parents:
diff changeset
   790
      if( each_simp_timeb(j)/each_simp_time(j) >= 10.0 )
Chengsong
parents:
diff changeset
   791
        println(j, each_simp_timeb(j), each_simp_time(j))
Chengsong
parents:
diff changeset
   792
    }
Chengsong
parents:
diff changeset
   793
  }
Chengsong
parents:
diff changeset
   794
  */
Chengsong
parents:
diff changeset
   795
Chengsong
parents:
diff changeset
   796
Chengsong
parents:
diff changeset
   797
  //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART (TESTING)
Chengsong
parents:
diff changeset
   798
Chengsong
parents:
diff changeset
   799
Chengsong
parents:
diff changeset
   800
Chengsong
parents:
diff changeset
   801
  def clear() = {
Chengsong
parents:
diff changeset
   802
    print("")
Chengsong
parents:
diff changeset
   803
    //print("\33[H\33[2J")
Chengsong
parents:
diff changeset
   804
  }
Chengsong
parents:
diff changeset
   805
Chengsong
parents:
diff changeset
   806
  //testing the two lexings produce the same value
Chengsong
parents:
diff changeset
   807
  //enumerates strings of length n over alphabet cs
Chengsong
parents:
diff changeset
   808
  def strs(n: Int, cs: String) : Set[String] = {
Chengsong
parents:
diff changeset
   809
    if (n == 0) Set("")
Chengsong
parents:
diff changeset
   810
    else {
Chengsong
parents:
diff changeset
   811
      val ss = strs(n - 1, cs)
Chengsong
parents:
diff changeset
   812
      ss ++
Chengsong
parents:
diff changeset
   813
      (for (s <- ss; c <- cs.toList) yield c + s)
Chengsong
parents:
diff changeset
   814
    }
Chengsong
parents:
diff changeset
   815
  }
Chengsong
parents:
diff changeset
   816
  def enum(n: Int, s: String) : Stream[Rexp] = n match {
Chengsong
parents:
diff changeset
   817
    case 0 => ZERO #:: ONE #:: s.toStream.map(CHAR)
Chengsong
parents:
diff changeset
   818
    case n => {  
Chengsong
parents:
diff changeset
   819
      val rs = enum(n - 1, s)
Chengsong
parents:
diff changeset
   820
      rs #:::
Chengsong
parents:
diff changeset
   821
      (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) #:::
Chengsong
parents:
diff changeset
   822
      (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) #:::
Chengsong
parents:
diff changeset
   823
      (for (r1 <- rs) yield STAR(r1))
Chengsong
parents:
diff changeset
   824
    }
Chengsong
parents:
diff changeset
   825
  }
Chengsong
parents:
diff changeset
   826
Chengsong
parents:
diff changeset
   827
  //tests blexing and lexing
Chengsong
parents:
diff changeset
   828
  def tests_blexer_simp(ss: Set[String])(r: Rexp) = {
Chengsong
parents:
diff changeset
   829
    clear()
Chengsong
parents:
diff changeset
   830
    //println(s"Testing ${r}")
Chengsong
parents:
diff changeset
   831
    for (s <- ss.par) yield {
Chengsong
parents:
diff changeset
   832
      val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None)
Chengsong
parents:
diff changeset
   833
      val res2 = Try(Some(blexing_simp(r, s))).getOrElse(None)
Chengsong
parents:
diff changeset
   834
      if (res1 != res2) println(s"Disagree on ${r} and ${s}")
Chengsong
parents:
diff changeset
   835
      if (res1 != res2) println(s"   ${res1} !=  ${res2}")
Chengsong
parents:
diff changeset
   836
      if (res1 != res2) Some((r, s)) else None
Chengsong
parents:
diff changeset
   837
    }
Chengsong
parents:
diff changeset
   838
  }
Chengsong
parents:
diff changeset
   839
Chengsong
parents:
diff changeset
   840
Chengsong
parents:
diff changeset
   841
Chengsong
parents:
diff changeset
   842
  //enum(3, "abc").map(tests_blexer_simp(strs(3, "abc"))).toSet
Chengsong
parents:
diff changeset
   843
  /*
Chengsong
parents:
diff changeset
   844
  def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = {
Chengsong
parents:
diff changeset
   845
    for (s <- ss){
Chengsong
parents:
diff changeset
   846
      
Chengsong
parents:
diff changeset
   847
      val der_res = bder(c, ar) 
Chengsong
parents:
diff changeset
   848
      val simp_res = bsimp(der_res)
Chengsong
parents:
diff changeset
   849
      println(asize(der_res))
Chengsong
parents:
diff changeset
   850
      println(asize(simp_res))
Chengsong
parents:
diff changeset
   851
      single_expression_explorer(simp_res, (sc - c))
Chengsong
parents:
diff changeset
   852
    }
Chengsong
parents:
diff changeset
   853
  }*/
Chengsong
parents:
diff changeset
   854
Chengsong
parents:
diff changeset
   855
  //single_expression_explorer(internalise(("c"~("a"+"b"))%) , Set('a','b','c'))
Chengsong
parents:
diff changeset
   856
Chengsong
parents:
diff changeset
   857
Chengsong
parents:
diff changeset
   858
}
Chengsong
parents:
diff changeset
   859
Chengsong
parents:
diff changeset
   860
import Rexp.Bits
Chengsong
parents:
diff changeset
   861
abstract class ARexp 
Chengsong
parents:
diff changeset
   862
case object AZERO extends ARexp
Chengsong
parents:
diff changeset
   863
case class AONE(bs: Bits) extends ARexp
Chengsong
parents:
diff changeset
   864
case class ACHAR(bs: Bits, f: Char) extends ARexp
Chengsong
parents:
diff changeset
   865
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
Chengsong
parents:
diff changeset
   866
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
Chengsong
parents:
diff changeset
   867
case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
Chengsong
parents:
diff changeset
   868
Chengsong
parents:
diff changeset
   869
Chengsong
parents:
diff changeset
   870
Chengsong
parents:
diff changeset
   871
abstract class Val
Chengsong
parents:
diff changeset
   872
case object Empty extends Val
Chengsong
parents:
diff changeset
   873
case class Chr(c: Char) extends Val
Chengsong
parents:
diff changeset
   874
case class Sequ(v1: Val, v2: Val) extends Val
Chengsong
parents:
diff changeset
   875
case class Left(v: Val) extends Val
Chengsong
parents:
diff changeset
   876
case class Right(v: Val) extends Val
Chengsong
parents:
diff changeset
   877
case class Stars(vs: List[Val]) extends Val
Chengsong
parents:
diff changeset
   878
case class Rec(x: String, v: Val) extends Val
Chengsong
parents:
diff changeset
   879
//case class Pos(i: Int, v: Val) extends Val
Chengsong
parents:
diff changeset
   880
case object Prd extends Val