lex_blex_Frankensteined.scala
author Chengsong
Sat, 16 Mar 2019 20:05:13 +0000
changeset 7 1572760ff866
parent 5 622ddbb1223a
child 11 9c1ca6d6e190
permissions -rw-r--r--
found the difference: caused by flats in ders_simp, the alts is at top most level , so no fuse and bits stay at the alts level whereas in ders + singele simp, the alts that should be the final top-level alts is not at the topmost level initially before simplification so it is opened up and bits fused. later it finds out itself the top level only aalts remaining, but the fuse is not reversible we do not know what happened either.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
Chengsong
parents:
diff changeset
     1
package RexpRelated
Chengsong
parents:
diff changeset
     2
import scala.language.implicitConversions    
Chengsong
parents:
diff changeset
     3
import scala.language.reflectiveCalls
Chengsong
parents:
diff changeset
     4
import scala.annotation.tailrec   
Chengsong
parents:
diff changeset
     5
import scala.util.Try
Chengsong
parents:
diff changeset
     6
Chengsong
parents:
diff changeset
     7
abstract class Bit
Chengsong
parents:
diff changeset
     8
case object Z extends Bit
Chengsong
parents:
diff changeset
     9
case object S extends Bit
Chengsong
parents:
diff changeset
    10
case class C(c: Char) extends Bit
Chengsong
parents:
diff changeset
    11
Chengsong
parents:
diff changeset
    12
Chengsong
parents:
diff changeset
    13
abstract class Rexp 
Chengsong
parents:
diff changeset
    14
case object ZERO extends Rexp
Chengsong
parents:
diff changeset
    15
case object ONE extends Rexp
Chengsong
parents:
diff changeset
    16
case class CHAR(c: Char) extends Rexp
Chengsong
parents:
diff changeset
    17
case class ALTS(rs: List[Rexp]) extends Rexp 
Chengsong
parents:
diff changeset
    18
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
Chengsong
parents:
diff changeset
    19
case class STAR(r: Rexp) extends Rexp 
Chengsong
parents:
diff changeset
    20
case class RECD(x: String, r: Rexp) extends Rexp
Chengsong
parents:
diff changeset
    21
Chengsong
parents:
diff changeset
    22
Chengsong
parents:
diff changeset
    23
Chengsong
parents:
diff changeset
    24
object Rexp{
Chengsong
parents:
diff changeset
    25
  type Bits = List[Bit]
Chengsong
parents:
diff changeset
    26
  // abbreviations
Chengsong
parents:
diff changeset
    27
  type Mon = (Char, Rexp)
Chengsong
parents:
diff changeset
    28
  type Lin = Set[Mon]
Chengsong
parents:
diff changeset
    29
  def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2))
Chengsong
parents:
diff changeset
    30
  def PLUS(r: Rexp) = SEQ(r, STAR(r))
Chengsong
parents:
diff changeset
    31
  def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))
Chengsong
parents:
diff changeset
    32
Chengsong
parents:
diff changeset
    33
Chengsong
parents:
diff changeset
    34
  def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
Chengsong
parents:
diff changeset
    35
    case Nil => Nil
Chengsong
parents:
diff changeset
    36
    case (x::xs) => {
Chengsong
parents:
diff changeset
    37
      val res = f(x)
Chengsong
parents:
diff changeset
    38
      if (acc.contains(res)) distinctBy(xs, f, acc)  
Chengsong
parents:
diff changeset
    39
      else x::distinctBy(xs, f, res::acc)
Chengsong
parents:
diff changeset
    40
    }
Chengsong
parents:
diff changeset
    41
  } 
Chengsong
parents:
diff changeset
    42
  // some convenience for typing in regular expressions
Chengsong
parents:
diff changeset
    43
  def charlist2rexp(s : List[Char]): Rexp = s match {
Chengsong
parents:
diff changeset
    44
    case Nil => ONE
Chengsong
parents:
diff changeset
    45
    case c::Nil => CHAR(c)
Chengsong
parents:
diff changeset
    46
    case c::s => SEQ(CHAR(c), charlist2rexp(s))
Chengsong
parents:
diff changeset
    47
  }
Chengsong
parents:
diff changeset
    48
  implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
Chengsong
parents:
diff changeset
    49
Chengsong
parents:
diff changeset
    50
  implicit def RexpOps(r: Rexp) = new {
Chengsong
parents:
diff changeset
    51
    def | (s: Rexp) = ALT(r, s)
Chengsong
parents:
diff changeset
    52
    def % = STAR(r)
Chengsong
parents:
diff changeset
    53
    def ~ (s: Rexp) = SEQ(r, s)
Chengsong
parents:
diff changeset
    54
  }
Chengsong
parents:
diff changeset
    55
Chengsong
parents:
diff changeset
    56
  implicit def stringOps(s: String) = new {
Chengsong
parents:
diff changeset
    57
    def | (r: Rexp) = ALT(s, r)
Chengsong
parents:
diff changeset
    58
    def | (r: String) = ALT(s, r)
Chengsong
parents:
diff changeset
    59
    def % = STAR(s)
Chengsong
parents:
diff changeset
    60
    def ~ (r: Rexp) = SEQ(s, r)
Chengsong
parents:
diff changeset
    61
    def ~ (r: String) = SEQ(s, r)
Chengsong
parents:
diff changeset
    62
    def $ (r: Rexp) = RECD(s, r)
Chengsong
parents:
diff changeset
    63
  }
Chengsong
parents:
diff changeset
    64
Chengsong
parents:
diff changeset
    65
  // translation into ARexps
Chengsong
parents:
diff changeset
    66
  def fuse(bs: Bits, r: ARexp) : ARexp = r match {
Chengsong
parents:
diff changeset
    67
    case AZERO => AZERO
Chengsong
parents:
diff changeset
    68
    case AONE(cs) => AONE(bs ++ cs)
Chengsong
parents:
diff changeset
    69
    case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
Chengsong
parents:
diff changeset
    70
    case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
Chengsong
parents:
diff changeset
    71
    case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
Chengsong
parents:
diff changeset
    72
    case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
Chengsong
parents:
diff changeset
    73
  }
Chengsong
parents:
diff changeset
    74
Chengsong
parents:
diff changeset
    75
  def internalise(r: Rexp) : ARexp = r match {
Chengsong
parents:
diff changeset
    76
    case ZERO => AZERO
Chengsong
parents:
diff changeset
    77
    case ONE => AONE(Nil)
Chengsong
parents:
diff changeset
    78
    case CHAR(c) => ACHAR(Nil, c)
Chengsong
parents:
diff changeset
    79
    case ALTS(List(r1, r2)) => 
Chengsong
parents:
diff changeset
    80
      AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
Chengsong
parents:
diff changeset
    81
    case ALTS(r1::rs) => {
Chengsong
parents:
diff changeset
    82
      val AALTS(Nil, rs2) = internalise(ALTS(rs))
Chengsong
parents:
diff changeset
    83
      AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
Chengsong
parents:
diff changeset
    84
    }
Chengsong
parents:
diff changeset
    85
    case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
Chengsong
parents:
diff changeset
    86
    case STAR(r) => ASTAR(Nil, internalise(r))
Chengsong
parents:
diff changeset
    87
    case RECD(x, r) => internalise(r)
Chengsong
parents:
diff changeset
    88
  }
Chengsong
parents:
diff changeset
    89
Chengsong
parents:
diff changeset
    90
  internalise(("a" | "ab") ~ ("b" | ""))
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
    91
0
Chengsong
parents:
diff changeset
    92
  def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
Chengsong
parents:
diff changeset
    93
    case (ONE, bs) => (Empty, bs)
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
    94
    case (CHAR(f), C(c)::bs) => (Chr(c), bs)
0
Chengsong
parents:
diff changeset
    95
    case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems tailor made for those who want to simplify the regex before der or simp
Chengsong
parents:
diff changeset
    96
    case (ALTS(rs), bs) => bs match {
Chengsong
parents:
diff changeset
    97
      case Z::bs1 => {
Chengsong
parents:
diff changeset
    98
        val (v, bs2) = decode_aux(rs.head, bs1)
Chengsong
parents:
diff changeset
    99
        (Left(v), bs2)
Chengsong
parents:
diff changeset
   100
      }
Chengsong
parents:
diff changeset
   101
      case S::bs1 => {
Chengsong
parents:
diff changeset
   102
        val (v, bs2) = decode_aux(ALTS(rs.tail), bs1)
Chengsong
parents:
diff changeset
   103
        (Right(v), bs2)			
Chengsong
parents:
diff changeset
   104
      }
Chengsong
parents:
diff changeset
   105
    }
Chengsong
parents:
diff changeset
   106
    case (SEQ(r1, r2), bs) => {
Chengsong
parents:
diff changeset
   107
      val (v1, bs1) = decode_aux(r1, bs)
Chengsong
parents:
diff changeset
   108
      val (v2, bs2) = decode_aux(r2, bs1)
Chengsong
parents:
diff changeset
   109
      (Sequ(v1, v2), bs2)
Chengsong
parents:
diff changeset
   110
    }
Chengsong
parents:
diff changeset
   111
    case (STAR(r1), S::bs) => {
Chengsong
parents:
diff changeset
   112
      val (v, bs1) = decode_aux(r1, bs)
Chengsong
parents:
diff changeset
   113
      //println(v)
Chengsong
parents:
diff changeset
   114
      val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
Chengsong
parents:
diff changeset
   115
      (Stars(v::vs), bs2)
Chengsong
parents:
diff changeset
   116
    }
Chengsong
parents:
diff changeset
   117
    case (STAR(_), Z::bs) => (Stars(Nil), bs)
Chengsong
parents:
diff changeset
   118
    case (RECD(x, r1), bs) => {
Chengsong
parents:
diff changeset
   119
      val (v, bs1) = decode_aux(r1, bs)
Chengsong
parents:
diff changeset
   120
      (Rec(x, v), bs1)
Chengsong
parents:
diff changeset
   121
    }
Chengsong
parents:
diff changeset
   122
  }
Chengsong
parents:
diff changeset
   123
Chengsong
parents:
diff changeset
   124
  def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
Chengsong
parents:
diff changeset
   125
    case (v, Nil) => v
Chengsong
parents:
diff changeset
   126
    case _ => throw new Exception("Not decodable")
Chengsong
parents:
diff changeset
   127
  }
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   128
0
Chengsong
parents:
diff changeset
   129
Chengsong
parents:
diff changeset
   130
  //erase function: extracts the regx from Aregex
Chengsong
parents:
diff changeset
   131
  def erase(r:ARexp): Rexp = r match{
Chengsong
parents:
diff changeset
   132
    case AZERO => ZERO
Chengsong
parents:
diff changeset
   133
    case AONE(_) => ONE
Chengsong
parents:
diff changeset
   134
    case ACHAR(bs, f) => CHAR(f)
Chengsong
parents:
diff changeset
   135
    case AALTS(bs, rs) => ALTS(rs.map(erase(_)))
Chengsong
parents:
diff changeset
   136
    case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
Chengsong
parents:
diff changeset
   137
    case ASTAR(cs, r)=> STAR(erase(r))
Chengsong
parents:
diff changeset
   138
  }
Chengsong
parents:
diff changeset
   139
Chengsong
parents:
diff changeset
   140
  //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART
Chengsong
parents:
diff changeset
   141
  // nullable function: tests whether the regular 
Chengsong
parents:
diff changeset
   142
  // expression can recognise the empty string
Chengsong
parents:
diff changeset
   143
  def nullable (r: Rexp) : Boolean = r match {
Chengsong
parents:
diff changeset
   144
    case ZERO => false
Chengsong
parents:
diff changeset
   145
    case ONE => true
Chengsong
parents:
diff changeset
   146
    case CHAR(_) => false
Chengsong
parents:
diff changeset
   147
    case ALTS(rs) => rs.exists(nullable)
Chengsong
parents:
diff changeset
   148
    case SEQ(r1, r2) => nullable(r1) && nullable(r2)
Chengsong
parents:
diff changeset
   149
    case STAR(_) => true
Chengsong
parents:
diff changeset
   150
    case RECD(_, r) => nullable(r)
Chengsong
parents:
diff changeset
   151
    //case PLUS(r) => nullable(r)
Chengsong
parents:
diff changeset
   152
  }
Chengsong
parents:
diff changeset
   153
Chengsong
parents:
diff changeset
   154
  // derivative of a regular expression w.r.t. a character
Chengsong
parents:
diff changeset
   155
  def der (c: Char, r: Rexp) : Rexp = r match {
Chengsong
parents:
diff changeset
   156
    case ZERO => ZERO
Chengsong
parents:
diff changeset
   157
    case ONE => ZERO
Chengsong
parents:
diff changeset
   158
    case CHAR(f) => if (c == f) ONE else ZERO
Chengsong
parents:
diff changeset
   159
    case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2)))
Chengsong
parents:
diff changeset
   160
    case SEQ(r1, r2) => 
Chengsong
parents:
diff changeset
   161
      if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2)))
Chengsong
parents:
diff changeset
   162
      else SEQ(der(c, r1), r2)
Chengsong
parents:
diff changeset
   163
    case STAR(r) => SEQ(der(c, r), STAR(r))
Chengsong
parents:
diff changeset
   164
    case RECD(_, r1) => der(c, r1)
Chengsong
parents:
diff changeset
   165
    //case PLUS(r) => SEQ(der(c, r), STAR(r))
Chengsong
parents:
diff changeset
   166
  }
Chengsong
parents:
diff changeset
   167
Chengsong
parents:
diff changeset
   168
  def flatten(v: Val) : String = v match {
Chengsong
parents:
diff changeset
   169
    case Empty => ""
Chengsong
parents:
diff changeset
   170
    case Chr(c) => c.toString
Chengsong
parents:
diff changeset
   171
    case Left(v) => flatten(v)
Chengsong
parents:
diff changeset
   172
    case Right(v) => flatten(v)
Chengsong
parents:
diff changeset
   173
    case Sequ(v1, v2) => flatten(v1) + flatten(v2)
Chengsong
parents:
diff changeset
   174
    case Stars(vs) => vs.map(flatten).mkString
Chengsong
parents:
diff changeset
   175
    case Rec(_, v) => flatten(v)
Chengsong
parents:
diff changeset
   176
  }
Chengsong
parents:
diff changeset
   177
Chengsong
parents:
diff changeset
   178
  // extracts an environment from a value
Chengsong
parents:
diff changeset
   179
  def env(v: Val) : List[(String, String)] = v match {
Chengsong
parents:
diff changeset
   180
    case Empty => Nil
Chengsong
parents:
diff changeset
   181
    case Chr(c) => Nil
Chengsong
parents:
diff changeset
   182
    case Left(v) => env(v)
Chengsong
parents:
diff changeset
   183
    case Right(v) => env(v)
Chengsong
parents:
diff changeset
   184
    case Sequ(v1, v2) => env(v1) ::: env(v2)
Chengsong
parents:
diff changeset
   185
    case Stars(vs) => vs.flatMap(env)
Chengsong
parents:
diff changeset
   186
    case Rec(x, v) => (x, flatten(v))::env(v)
Chengsong
parents:
diff changeset
   187
  }
Chengsong
parents:
diff changeset
   188
Chengsong
parents:
diff changeset
   189
Chengsong
parents:
diff changeset
   190
  // injection part
Chengsong
parents:
diff changeset
   191
  def mkeps(r: Rexp) : Val = r match {
Chengsong
parents:
diff changeset
   192
    case ONE => Empty
Chengsong
parents:
diff changeset
   193
    case ALTS(List(r1, r2)) => 
Chengsong
parents:
diff changeset
   194
      if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
Chengsong
parents:
diff changeset
   195
    case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
Chengsong
parents:
diff changeset
   196
    case STAR(r) => Stars(Nil)
Chengsong
parents:
diff changeset
   197
    case RECD(x, r) => Rec(x, mkeps(r))
Chengsong
parents:
diff changeset
   198
    //case PLUS(r) => Stars(List(mkeps(r)))
Chengsong
parents:
diff changeset
   199
  }
Chengsong
parents:
diff changeset
   200
Chengsong
parents:
diff changeset
   201
  def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
Chengsong
parents:
diff changeset
   202
    case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
Chengsong
parents:
diff changeset
   203
    case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
Chengsong
parents:
diff changeset
   204
    case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
Chengsong
parents:
diff changeset
   205
    case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
Chengsong
parents:
diff changeset
   206
    case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1))
Chengsong
parents:
diff changeset
   207
    case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2))
Chengsong
parents:
diff changeset
   208
    case (CHAR(_), Empty) => Chr(c) 
Chengsong
parents:
diff changeset
   209
    case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
Chengsong
parents:
diff changeset
   210
    //case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
Chengsong
parents:
diff changeset
   211
  }
Chengsong
parents:
diff changeset
   212
  def lex(r: Rexp, s: List[Char]) : Val = s match {
Chengsong
parents:
diff changeset
   213
    case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   214
    case c::cs => inj(r, c, lex(der(c, r), cs))
Chengsong
parents:
diff changeset
   215
  }
Chengsong
parents:
diff changeset
   216
Chengsong
parents:
diff changeset
   217
  def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
Chengsong
parents:
diff changeset
   218
Chengsong
parents:
diff changeset
   219
  // some "rectification" functions for simplification
Chengsong
parents:
diff changeset
   220
  def F_ID(v: Val): Val = v
Chengsong
parents:
diff changeset
   221
  def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
Chengsong
parents:
diff changeset
   222
  def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
Chengsong
parents:
diff changeset
   223
  def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
Chengsong
parents:
diff changeset
   224
    case Right(v) => Right(f2(v))
Chengsong
parents:
diff changeset
   225
    case Left(v) => Left(f1(v))
Chengsong
parents:
diff changeset
   226
  }
Chengsong
parents:
diff changeset
   227
  def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
Chengsong
parents:
diff changeset
   228
    case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
Chengsong
parents:
diff changeset
   229
  }
Chengsong
parents:
diff changeset
   230
  def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
Chengsong
parents:
diff changeset
   231
    (v:Val) => Sequ(f1(Empty), f2(v))
Chengsong
parents:
diff changeset
   232
  def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
Chengsong
parents:
diff changeset
   233
    (v:Val) => Sequ(f1(v), f2(Empty))
Chengsong
parents:
diff changeset
   234
  def F_RECD(f: Val => Val) = (v:Val) => v match {
Chengsong
parents:
diff changeset
   235
    case Rec(x, v) => Rec(x, f(v))
Chengsong
parents:
diff changeset
   236
  }
Chengsong
parents:
diff changeset
   237
  def F_ERROR(v: Val): Val = throw new Exception("error")
Chengsong
parents:
diff changeset
   238
Chengsong
parents:
diff changeset
   239
  // simplification of regular expressions returning also an
Chengsong
parents:
diff changeset
   240
  // rectification function; no simplification under STAR 
Chengsong
parents:
diff changeset
   241
  def simp(r: Rexp): (Rexp, Val => Val) = r match {
Chengsong
parents:
diff changeset
   242
    case ALTS(List(r1, r2)) => {
Chengsong
parents:
diff changeset
   243
      val (r1s, f1s) = simp(r1)
Chengsong
parents:
diff changeset
   244
      val (r2s, f2s) = simp(r2)
Chengsong
parents:
diff changeset
   245
      (r1s, r2s) match {
Chengsong
parents:
diff changeset
   246
        case (ZERO, _) => (r2s, F_RIGHT(f2s))
Chengsong
parents:
diff changeset
   247
        case (_, ZERO) => (r1s, F_LEFT(f1s))
Chengsong
parents:
diff changeset
   248
        case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
Chengsong
parents:
diff changeset
   249
                  else (ALTS(List(r1s, r2s)), F_ALT(f1s, f2s)) 
Chengsong
parents:
diff changeset
   250
      }
Chengsong
parents:
diff changeset
   251
    }
Chengsong
parents:
diff changeset
   252
    case SEQ(r1, r2) => {
Chengsong
parents:
diff changeset
   253
      val (r1s, f1s) = simp(r1)
Chengsong
parents:
diff changeset
   254
      val (r2s, f2s) = simp(r2)
Chengsong
parents:
diff changeset
   255
      (r1s, r2s) match {
Chengsong
parents:
diff changeset
   256
        case (ZERO, _) => (ZERO, F_ERROR)
Chengsong
parents:
diff changeset
   257
        case (_, ZERO) => (ZERO, F_ERROR)
Chengsong
parents:
diff changeset
   258
        case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
Chengsong
parents:
diff changeset
   259
        case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
Chengsong
parents:
diff changeset
   260
        case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
Chengsong
parents:
diff changeset
   261
      }
Chengsong
parents:
diff changeset
   262
    }
Chengsong
parents:
diff changeset
   263
    case RECD(x, r1) => {
Chengsong
parents:
diff changeset
   264
      val (r1s, f1s) = simp(r1)
Chengsong
parents:
diff changeset
   265
      (RECD(x, r1s), F_RECD(f1s))
Chengsong
parents:
diff changeset
   266
    }
Chengsong
parents:
diff changeset
   267
    case r => (r, F_ID)
Chengsong
parents:
diff changeset
   268
  }
Chengsong
parents:
diff changeset
   269
  /*
Chengsong
parents:
diff changeset
   270
  val each_simp_time = scala.collection.mutable.ArrayBuffer.empty[Long]
Chengsong
parents:
diff changeset
   271
  val each_simp_timeb = scala.collection.mutable.ArrayBuffer.empty[Long]
Chengsong
parents:
diff changeset
   272
  */
Chengsong
parents:
diff changeset
   273
  def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
Chengsong
parents:
diff changeset
   274
    case Nil => {
Chengsong
parents:
diff changeset
   275
      if (nullable(r)) {
Chengsong
parents:
diff changeset
   276
        mkeps(r) 
Chengsong
parents:
diff changeset
   277
      }
Chengsong
parents:
diff changeset
   278
      else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   279
    }
Chengsong
parents:
diff changeset
   280
    case c::cs => {
Chengsong
parents:
diff changeset
   281
      val (r_simp, f_simp) = simp(der(c, r))
Chengsong
parents:
diff changeset
   282
      inj(r, c, f_simp(lex_simp(r_simp, cs)))
Chengsong
parents:
diff changeset
   283
    }
Chengsong
parents:
diff changeset
   284
  }
Chengsong
parents:
diff changeset
   285
Chengsong
parents:
diff changeset
   286
  def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
Chengsong
parents:
diff changeset
   287
Chengsong
parents:
diff changeset
   288
  //println(lexing_simp(("a" | "ab") ~ ("b" | ""), "ab"))
Chengsong
parents:
diff changeset
   289
Chengsong
parents:
diff changeset
   290
  // filters out all white spaces
Chengsong
parents:
diff changeset
   291
  def tokenise(r: Rexp, s: String) = 
Chengsong
parents:
diff changeset
   292
    env(lexing_simp(r, s)).filterNot { _._1 == "w"}
Chengsong
parents:
diff changeset
   293
Chengsong
parents:
diff changeset
   294
Chengsong
parents:
diff changeset
   295
  //reads the string from a file 
Chengsong
parents:
diff changeset
   296
  def fromFile(name: String) : String = 
Chengsong
parents:
diff changeset
   297
    io.Source.fromFile(name).mkString
Chengsong
parents:
diff changeset
   298
Chengsong
parents:
diff changeset
   299
  def tokenise_file(r: Rexp, name: String) = 
Chengsong
parents:
diff changeset
   300
    tokenise(r, fromFile(name))
Chengsong
parents:
diff changeset
   301
  
Chengsong
parents:
diff changeset
   302
  //   Testing
Chengsong
parents:
diff changeset
   303
  //============
Chengsong
parents:
diff changeset
   304
Chengsong
parents:
diff changeset
   305
  def time[T](code: => T) = {
Chengsong
parents:
diff changeset
   306
    val start = System.nanoTime()
Chengsong
parents:
diff changeset
   307
    val result = code
Chengsong
parents:
diff changeset
   308
    val end = System.nanoTime()
Chengsong
parents:
diff changeset
   309
    println((end - start)/1.0e9)
Chengsong
parents:
diff changeset
   310
    result
Chengsong
parents:
diff changeset
   311
  }
Chengsong
parents:
diff changeset
   312
Chengsong
parents:
diff changeset
   313
  //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART
Chengsong
parents:
diff changeset
   314
Chengsong
parents:
diff changeset
   315
  // bnullable function: tests whether the aregular 
Chengsong
parents:
diff changeset
   316
  // expression can recognise the empty string
Chengsong
parents:
diff changeset
   317
  def bnullable (r: ARexp) : Boolean = r match {
Chengsong
parents:
diff changeset
   318
    case AZERO => false
Chengsong
parents:
diff changeset
   319
    case AONE(_) => true
Chengsong
parents:
diff changeset
   320
    case ACHAR(_,_) => false
Chengsong
parents:
diff changeset
   321
    case AALTS(_, rs) => rs.exists(bnullable)
Chengsong
parents:
diff changeset
   322
    case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
Chengsong
parents:
diff changeset
   323
    case ASTAR(_, _) => true
Chengsong
parents:
diff changeset
   324
  }
Chengsong
parents:
diff changeset
   325
Chengsong
parents:
diff changeset
   326
  def mkepsBC(r: ARexp) : Bits = r match {
Chengsong
parents:
diff changeset
   327
    case AONE(bs) => bs
Chengsong
parents:
diff changeset
   328
    case AALTS(bs, rs) => {
Chengsong
parents:
diff changeset
   329
      val n = rs.indexWhere(bnullable)
Chengsong
parents:
diff changeset
   330
      bs ++ mkepsBC(rs(n))
Chengsong
parents:
diff changeset
   331
    }
Chengsong
parents:
diff changeset
   332
    case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
Chengsong
parents:
diff changeset
   333
    case ASTAR(bs, r) => bs ++ List(Z)
Chengsong
parents:
diff changeset
   334
  }
Chengsong
parents:
diff changeset
   335
Chengsong
parents:
diff changeset
   336
  // derivative of a regular expression w.r.t. a character
Chengsong
parents:
diff changeset
   337
  def bder(c: Char, r: ARexp) : ARexp = r match {
Chengsong
parents:
diff changeset
   338
    case AZERO => AZERO
Chengsong
parents:
diff changeset
   339
    case AONE(_) => AZERO
Chengsong
parents:
diff changeset
   340
    case ACHAR(bs, f) => if (c == f) AONE(bs:::List(C(c))) else AZERO
Chengsong
parents:
diff changeset
   341
    case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
Chengsong
parents:
diff changeset
   342
    case ASEQ(bs, r1, r2) => 
Chengsong
parents:
diff changeset
   343
      if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(mkepsBC(r1), bder(c, r2)))
Chengsong
parents:
diff changeset
   344
      else ASEQ(bs, bder(c, r1), r2)
Chengsong
parents:
diff changeset
   345
    case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
Chengsong
parents:
diff changeset
   346
  }
Chengsong
parents:
diff changeset
   347
Chengsong
parents:
diff changeset
   348
Chengsong
parents:
diff changeset
   349
  def ders (s: List[Char], r: Rexp) : Rexp = s match {
Chengsong
parents:
diff changeset
   350
    case Nil => r
Chengsong
parents:
diff changeset
   351
    case c::s => ders(s, der(c, r))
Chengsong
parents:
diff changeset
   352
  }
Chengsong
parents:
diff changeset
   353
Chengsong
parents:
diff changeset
   354
  // derivative w.r.t. a string (iterates bder)
Chengsong
parents:
diff changeset
   355
  @tailrec
Chengsong
parents:
diff changeset
   356
  def bders (s: List[Char], r: ARexp) : ARexp = s match {
Chengsong
parents:
diff changeset
   357
    case Nil => r
Chengsong
parents:
diff changeset
   358
    case c::s => bders(s, bder(c, r))
Chengsong
parents:
diff changeset
   359
  }
Chengsong
parents:
diff changeset
   360
Chengsong
parents:
diff changeset
   361
  def flats(rs: List[ARexp]): List[ARexp] = rs match {
Chengsong
parents:
diff changeset
   362
      case Nil => Nil
Chengsong
parents:
diff changeset
   363
      case AZERO :: rs1 => flats(rs1)
Chengsong
parents:
diff changeset
   364
      case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
Chengsong
parents:
diff changeset
   365
      case r1 :: rs2 => r1 :: flats(rs2)
Chengsong
parents:
diff changeset
   366
    }
Chengsong
parents:
diff changeset
   367
  def rflats(rs: List[Rexp]): List[Rexp] = rs match {
Chengsong
parents:
diff changeset
   368
    case Nil => Nil
Chengsong
parents:
diff changeset
   369
    case ZERO :: rs1 => rflats(rs1)
Chengsong
parents:
diff changeset
   370
    case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2)
Chengsong
parents:
diff changeset
   371
    case r1 :: rs2 => r1 :: rflats(rs2)
Chengsong
parents:
diff changeset
   372
  }
Chengsong
parents:
diff changeset
   373
  var flats_time = 0L
Chengsong
parents:
diff changeset
   374
  var dist_time = 0L
Chengsong
parents:
diff changeset
   375
  
Chengsong
parents:
diff changeset
   376
  def bsimp(r: ARexp): ARexp = r match {
Chengsong
parents:
diff changeset
   377
    case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
Chengsong
parents:
diff changeset
   378
        case (AZERO, _) => AZERO
Chengsong
parents:
diff changeset
   379
        case (_, AZERO) => AZERO
Chengsong
parents:
diff changeset
   380
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
Chengsong
parents:
diff changeset
   381
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
Chengsong
parents:
diff changeset
   382
    }
Chengsong
parents:
diff changeset
   383
    case AALTS(bs1, rs) => {
Chengsong
parents:
diff changeset
   384
      val rs_simp = rs.map(bsimp)
Chengsong
parents:
diff changeset
   385
      val flat_res = flats(rs_simp)
Chengsong
parents:
diff changeset
   386
      val dist_res = distinctBy(flat_res, erase)
Chengsong
parents:
diff changeset
   387
      dist_res match {
Chengsong
parents:
diff changeset
   388
        case Nil => AZERO
Chengsong
parents:
diff changeset
   389
        case s :: Nil => fuse(bs1, s)
Chengsong
parents:
diff changeset
   390
        case rs => AALTS(bs1, rs)  
Chengsong
parents:
diff changeset
   391
      }
Chengsong
parents:
diff changeset
   392
    }
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   393
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
0
Chengsong
parents:
diff changeset
   394
    case r => r
Chengsong
parents:
diff changeset
   395
  }
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   396
  def super_bsimp(r: ARexp): ARexp = r match {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   397
    case ASEQ(bs1, r1, r2) => (super_bsimp(r1), super_bsimp(r2)) match {
0
Chengsong
parents:
diff changeset
   398
        case (AZERO, _) => AZERO
Chengsong
parents:
diff changeset
   399
        case (_, AZERO) => AZERO
Chengsong
parents:
diff changeset
   400
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   401
        case (AALTS(bs2, rs), r2) => AALTS(bs1 ++ bs2, rs.map(r => r match {case AONE(bs3) => fuse(bs3, r2) case r => ASEQ(Nil, r, r2)}) ) 
0
Chengsong
parents:
diff changeset
   402
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
Chengsong
parents:
diff changeset
   403
    }
Chengsong
parents:
diff changeset
   404
    case AALTS(bs1, rs) => {
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   405
      val rs_simp = rs.map(super_bsimp)
0
Chengsong
parents:
diff changeset
   406
      val flat_res = flats(rs_simp)
Chengsong
parents:
diff changeset
   407
      val dist_res = distinctBy(flat_res, erase)
Chengsong
parents:
diff changeset
   408
      dist_res match {
Chengsong
parents:
diff changeset
   409
        case Nil => AZERO
Chengsong
parents:
diff changeset
   410
        case s :: Nil => fuse(bs1, s)
Chengsong
parents:
diff changeset
   411
        case rs => AALTS(bs1, rs)  
Chengsong
parents:
diff changeset
   412
      }
Chengsong
parents:
diff changeset
   413
    }
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   414
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
0
Chengsong
parents:
diff changeset
   415
    case r => r
Chengsong
parents:
diff changeset
   416
  }
Chengsong
parents:
diff changeset
   417
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   418
0
Chengsong
parents:
diff changeset
   419
  def simp_weakened(r: Rexp): Rexp = r match {
Chengsong
parents:
diff changeset
   420
    case SEQ(r1, r2) => (simp_weakened(r1), r2) match {
Chengsong
parents:
diff changeset
   421
        case (ZERO, _) => ZERO
Chengsong
parents:
diff changeset
   422
        case (_, ZERO) => ZERO
Chengsong
parents:
diff changeset
   423
        case (ONE, r2s) => r2s
Chengsong
parents:
diff changeset
   424
        case (r1s, r2s) => SEQ(r1s, r2s)
Chengsong
parents:
diff changeset
   425
    }
Chengsong
parents:
diff changeset
   426
    case ALTS(rs) => {
Chengsong
parents:
diff changeset
   427
      val rs_simp = rs.map(simp_weakened)
Chengsong
parents:
diff changeset
   428
      val flat_res = rflats(rs_simp)
Chengsong
parents:
diff changeset
   429
      val dist_res = rs_simp.distinct
Chengsong
parents:
diff changeset
   430
      dist_res match {
Chengsong
parents:
diff changeset
   431
        case Nil => ZERO
Chengsong
parents:
diff changeset
   432
        case s :: Nil => s
Chengsong
parents:
diff changeset
   433
        case rs => ALTS(rs)  
Chengsong
parents:
diff changeset
   434
      }
Chengsong
parents:
diff changeset
   435
    }
Chengsong
parents:
diff changeset
   436
    case STAR(r) => STAR(simp_weakened(r))
Chengsong
parents:
diff changeset
   437
    case r => r
Chengsong
parents:
diff changeset
   438
  }
Chengsong
parents:
diff changeset
   439
    
Chengsong
parents:
diff changeset
   440
  
Chengsong
parents:
diff changeset
   441
  //----------------------------------------------------------------------------experiment bsimp
Chengsong
parents:
diff changeset
   442
  /*
Chengsong
parents:
diff changeset
   443
  def bders_simp (s: List[Char], r: ARexp) : ARexp = s match {
Chengsong
parents:
diff changeset
   444
    case Nil => r
Chengsong
parents:
diff changeset
   445
    case c::s => bders_simp(s, bsimp(bder(c, r)))
Chengsong
parents:
diff changeset
   446
  }
Chengsong
parents:
diff changeset
   447
  */
Chengsong
parents:
diff changeset
   448
  /*
Chengsong
parents:
diff changeset
   449
  def time[T](code: => T) = {
Chengsong
parents:
diff changeset
   450
    val start = System.nanoTime()
Chengsong
parents:
diff changeset
   451
    val result = code
Chengsong
parents:
diff changeset
   452
    val end = System.nanoTime()
Chengsong
parents:
diff changeset
   453
    println((end - start)/1.0e9)
Chengsong
parents:
diff changeset
   454
    result
Chengsong
parents:
diff changeset
   455
  }
Chengsong
parents:
diff changeset
   456
  */
Chengsong
parents:
diff changeset
   457
  // main unsimplified lexing function (produces a value)
Chengsong
parents:
diff changeset
   458
  def blex(r: ARexp, s: List[Char]) : Bits = s match {
Chengsong
parents:
diff changeset
   459
    case Nil => if (bnullable(r)) mkepsBC(r) else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   460
    case c::cs => {
Chengsong
parents:
diff changeset
   461
      val der_res = bder(c,r)
Chengsong
parents:
diff changeset
   462
      blex(der_res, cs)
Chengsong
parents:
diff changeset
   463
    }
Chengsong
parents:
diff changeset
   464
  }
Chengsong
parents:
diff changeset
   465
Chengsong
parents:
diff changeset
   466
  def bpre_lexing(r: Rexp, s: String) = blex(internalise(r), s.toList)
Chengsong
parents:
diff changeset
   467
  //def blexing(r: Rexp, s: String) : Val = decode(r, blex(internalise(r), s.toList))
Chengsong
parents:
diff changeset
   468
Chengsong
parents:
diff changeset
   469
  var bder_time = 0L
Chengsong
parents:
diff changeset
   470
  var bsimp_time = 0L
Chengsong
parents:
diff changeset
   471
  var mkepsBC_time = 0L
Chengsong
parents:
diff changeset
   472
  var small_de = 2
Chengsong
parents:
diff changeset
   473
  var big_de = 5
Chengsong
parents:
diff changeset
   474
  var usual_de = 3
Chengsong
parents:
diff changeset
   475
  
Chengsong
parents:
diff changeset
   476
  def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
Chengsong
parents:
diff changeset
   477
    case Nil => {
Chengsong
parents:
diff changeset
   478
      if (bnullable(r)) {
Chengsong
parents:
diff changeset
   479
        //println(asize(r))
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   480
        mkepsBC(r)
0
Chengsong
parents:
diff changeset
   481
      }
Chengsong
parents:
diff changeset
   482
    else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   483
    }
Chengsong
parents:
diff changeset
   484
    case c::cs => {
Chengsong
parents:
diff changeset
   485
      val der_res = bder(c,r)
Chengsong
parents:
diff changeset
   486
      val simp_res = bsimp(der_res)
Chengsong
parents:
diff changeset
   487
      blex_simp(simp_res, cs)      
Chengsong
parents:
diff changeset
   488
    }
Chengsong
parents:
diff changeset
   489
  }
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   490
  def super_blex_simp(r: ARexp, s: List[Char]): Bits = s match {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   491
    case Nil => {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   492
      if (bnullable(r)) {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   493
        mkepsBC(r)
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   494
      }
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   495
      else throw new Exception("Not matched")
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   496
    }
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   497
    case c::cs => {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   498
      super_blex_simp(super_bsimp(bder(c,r)), cs)
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   499
    }
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   500
  }
0
Chengsong
parents:
diff changeset
   501
  def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{
Chengsong
parents:
diff changeset
   502
    case Nil => r
Chengsong
parents:
diff changeset
   503
    case c::cs => blex_real_simp(bsimp(bder(c, r)), cs)
Chengsong
parents:
diff changeset
   504
  }
Chengsong
parents:
diff changeset
   505
Chengsong
parents:
diff changeset
   506
Chengsong
parents:
diff changeset
   507
  //size: of a Aregx for testing purposes 
Chengsong
parents:
diff changeset
   508
  def size(r: Rexp) : Int = r match {
Chengsong
parents:
diff changeset
   509
    case ZERO => 1
Chengsong
parents:
diff changeset
   510
    case ONE => 1
Chengsong
parents:
diff changeset
   511
    case CHAR(_) => 1
Chengsong
parents:
diff changeset
   512
    case SEQ(r1, r2) => 1 + size(r1) + size(r2)
Chengsong
parents:
diff changeset
   513
    case ALTS(rs) => 1 + rs.map(size).sum
Chengsong
parents:
diff changeset
   514
    case STAR(r) => 1 + size(r)
Chengsong
parents:
diff changeset
   515
  }
Chengsong
parents:
diff changeset
   516
Chengsong
parents:
diff changeset
   517
  def asize(a: ARexp) = size(erase(a))
Chengsong
parents:
diff changeset
   518
Chengsong
parents:
diff changeset
   519
Chengsong
parents:
diff changeset
   520
  // decoding does not work yet
Chengsong
parents:
diff changeset
   521
  def blexing_simp(r: Rexp, s: String) = {
Chengsong
parents:
diff changeset
   522
    val bit_code = blex_simp(internalise(r), s.toList)
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   523
    decode(r, bit_code) 
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   524
  }
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   525
  def super_blexing_simp(r: Rexp, s: String) = {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   526
    decode(r, super_blex_simp(internalise(r), s.toList))
0
Chengsong
parents:
diff changeset
   527
  }
Chengsong
parents:
diff changeset
   528
Chengsong
parents:
diff changeset
   529
Chengsong
parents:
diff changeset
   530
Chengsong
parents:
diff changeset
   531
Chengsong
parents:
diff changeset
   532
Chengsong
parents:
diff changeset
   533
  // Lexing Rules for a Small While Language
Chengsong
parents:
diff changeset
   534
Chengsong
parents:
diff changeset
   535
  //symbols
Chengsong
parents:
diff changeset
   536
  /*
Chengsong
parents:
diff changeset
   537
  val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_))
Chengsong
parents:
diff changeset
   538
  
Chengsong
parents:
diff changeset
   539
  //digits
Chengsong
parents:
diff changeset
   540
  val DIGIT = PRED("0123456789".contains(_))
Chengsong
parents:
diff changeset
   541
  //identifiers
Chengsong
parents:
diff changeset
   542
  val ID = SYM ~ (SYM | DIGIT).% 
Chengsong
parents:
diff changeset
   543
  //numbers
Chengsong
parents:
diff changeset
   544
  val NUM = STAR(DIGIT)
Chengsong
parents:
diff changeset
   545
  //keywords
Chengsong
parents:
diff changeset
   546
  val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false"
Chengsong
parents:
diff changeset
   547
  val AKEYWORD: Rexp = ALTS(List("skip" , "while" , "do" , "if" , "then" , "else" , "read" , "write" , "true" , "false"))
Chengsong
parents:
diff changeset
   548
  //semicolons
Chengsong
parents:
diff changeset
   549
  val SEMI: Rexp = ";"
Chengsong
parents:
diff changeset
   550
  //operators
Chengsong
parents:
diff changeset
   551
  val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
Chengsong
parents:
diff changeset
   552
  val AOP: Rexp = ALTS(List(":=" , "==" , "-" , "+" , "*" , "!=" , "<" , ">" , "<=" , ">=" , "%" , "/"))
Chengsong
parents:
diff changeset
   553
  //whitespaces
Chengsong
parents:
diff changeset
   554
  val WHITESPACE = PLUS(" " | "\n" | "\t")
Chengsong
parents:
diff changeset
   555
  //parentheses
Chengsong
parents:
diff changeset
   556
  val RPAREN: Rexp = ")"
Chengsong
parents:
diff changeset
   557
  val LPAREN: Rexp = "("
Chengsong
parents:
diff changeset
   558
  val BEGIN: Rexp = "{"
Chengsong
parents:
diff changeset
   559
  val END: Rexp = "}"
Chengsong
parents:
diff changeset
   560
  //strings...but probably needs not
Chengsong
parents:
diff changeset
   561
  val STRING: Rexp = "\"" ~ SYM.% ~ "\""
Chengsong
parents:
diff changeset
   562
Chengsong
parents:
diff changeset
   563
Chengsong
parents:
diff changeset
   564
Chengsong
parents:
diff changeset
   565
  val WHILE_REGS = (("k" $ KEYWORD) | 
Chengsong
parents:
diff changeset
   566
                    ("i" $ ID) | 
Chengsong
parents:
diff changeset
   567
                    ("o" $ OP) | 
Chengsong
parents:
diff changeset
   568
                    ("n" $ NUM) | 
Chengsong
parents:
diff changeset
   569
                    ("s" $ SEMI) | 
Chengsong
parents:
diff changeset
   570
                    ("str" $ STRING) |
Chengsong
parents:
diff changeset
   571
                    ("p" $ (LPAREN | RPAREN)) | 
Chengsong
parents:
diff changeset
   572
                    ("b" $ (BEGIN | END)) | 
Chengsong
parents:
diff changeset
   573
                    ("w" $ WHITESPACE)).%
Chengsong
parents:
diff changeset
   574
Chengsong
parents:
diff changeset
   575
  val AWHILE_REGS = (
Chengsong
parents:
diff changeset
   576
    ALTS(
Chengsong
parents:
diff changeset
   577
      List(
Chengsong
parents:
diff changeset
   578
        ("k" $ AKEYWORD), 
Chengsong
parents:
diff changeset
   579
                    ("i" $ ID),
Chengsong
parents:
diff changeset
   580
                    ("o" $ AOP) , 
Chengsong
parents:
diff changeset
   581
                    ("n" $ NUM) ,
Chengsong
parents:
diff changeset
   582
                    ("s" $ SEMI) ,
Chengsong
parents:
diff changeset
   583
                    ("str" $ STRING), 
Chengsong
parents:
diff changeset
   584
                    ("p" $ (LPAREN | RPAREN)), 
Chengsong
parents:
diff changeset
   585
                    ("b" $ (BEGIN | END)), 
Chengsong
parents:
diff changeset
   586
                    ("w" $ WHITESPACE)
Chengsong
parents:
diff changeset
   587
      )
Chengsong
parents:
diff changeset
   588
    )
Chengsong
parents:
diff changeset
   589
  ).%
Chengsong
parents:
diff changeset
   590
Chengsong
parents:
diff changeset
   591
*/
Chengsong
parents:
diff changeset
   592
Chengsong
parents:
diff changeset
   593
Chengsong
parents:
diff changeset
   594
  //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART (TESTING)
Chengsong
parents:
diff changeset
   595
  /*
Chengsong
parents:
diff changeset
   596
  // Two Simple While programs
Chengsong
parents:
diff changeset
   597
  //========================
Chengsong
parents:
diff changeset
   598
  println("prog0 test")
Chengsong
parents:
diff changeset
   599
Chengsong
parents:
diff changeset
   600
  val prog0 = """read n"""
Chengsong
parents:
diff changeset
   601
  println(env(lexing_simp(WHILE_REGS, prog0)))
Chengsong
parents:
diff changeset
   602
  println(tokenise(WHILE_REGS, prog0))
Chengsong
parents:
diff changeset
   603
Chengsong
parents:
diff changeset
   604
  println("prog1 test")
Chengsong
parents:
diff changeset
   605
Chengsong
parents:
diff changeset
   606
  val prog1 = """read  n; write (n)"""
Chengsong
parents:
diff changeset
   607
  println(tokenise(WHILE_REGS, prog1))
Chengsong
parents:
diff changeset
   608
Chengsong
parents:
diff changeset
   609
  */
Chengsong
parents:
diff changeset
   610
  // Bigger Tests
Chengsong
parents:
diff changeset
   611
  //==============
Chengsong
parents:
diff changeset
   612
Chengsong
parents:
diff changeset
   613
  def escape(raw: String): String = {
Chengsong
parents:
diff changeset
   614
    import scala.reflect.runtime.universe._
Chengsong
parents:
diff changeset
   615
    Literal(Constant(raw)).toString
Chengsong
parents:
diff changeset
   616
  }
Chengsong
parents:
diff changeset
   617
Chengsong
parents:
diff changeset
   618
  val prog2 = """
Chengsong
parents:
diff changeset
   619
  write "Fib";
Chengsong
parents:
diff changeset
   620
  read n;
Chengsong
parents:
diff changeset
   621
  minus1 := 0;
Chengsong
parents:
diff changeset
   622
  minus2 := 1;
Chengsong
parents:
diff changeset
   623
  while n > 0 do {
Chengsong
parents:
diff changeset
   624
    temp := minus2;
Chengsong
parents:
diff changeset
   625
    minus2 := minus1 + minus2;
Chengsong
parents:
diff changeset
   626
    minus1 := temp;
Chengsong
parents:
diff changeset
   627
    n := n - 1
Chengsong
parents:
diff changeset
   628
  };
Chengsong
parents:
diff changeset
   629
  write "Result";
Chengsong
parents:
diff changeset
   630
  write minus2
Chengsong
parents:
diff changeset
   631
  """
Chengsong
parents:
diff changeset
   632
Chengsong
parents:
diff changeset
   633
  val prog3 = """
Chengsong
parents:
diff changeset
   634
  start := 1000;
Chengsong
parents:
diff changeset
   635
  x := start;
Chengsong
parents:
diff changeset
   636
  y := start;
Chengsong
parents:
diff changeset
   637
  z := start;
Chengsong
parents:
diff changeset
   638
  while 0 < x do {
Chengsong
parents:
diff changeset
   639
  while 0 < y do {
Chengsong
parents:
diff changeset
   640
    while 0 < z do {
Chengsong
parents:
diff changeset
   641
      z := z - 1
Chengsong
parents:
diff changeset
   642
    };
Chengsong
parents:
diff changeset
   643
    z := start;
Chengsong
parents:
diff changeset
   644
    y := y - 1
Chengsong
parents:
diff changeset
   645
  };     
Chengsong
parents:
diff changeset
   646
  y := start;
Chengsong
parents:
diff changeset
   647
  x := x - 1
Chengsong
parents:
diff changeset
   648
  }
Chengsong
parents:
diff changeset
   649
  """
Chengsong
parents:
diff changeset
   650
  /*
Chengsong
parents:
diff changeset
   651
  for(i <- 400 to 400 by 1){
Chengsong
parents:
diff changeset
   652
    println(i+":")
Chengsong
parents:
diff changeset
   653
    blexing_simp(WHILE_REGS, prog2 * i)
Chengsong
parents:
diff changeset
   654
  } */
Chengsong
parents:
diff changeset
   655
Chengsong
parents:
diff changeset
   656
    /*
Chengsong
parents:
diff changeset
   657
    for (i <- 2 to 5){
Chengsong
parents:
diff changeset
   658
      for(j <- 1 to 3){
Chengsong
parents:
diff changeset
   659
        println(i,j)
Chengsong
parents:
diff changeset
   660
        small_de = i
Chengsong
parents:
diff changeset
   661
        usual_de = i + j
Chengsong
parents:
diff changeset
   662
        big_de = i + 2*j 
Chengsong
parents:
diff changeset
   663
        blexing_simp(AWHILE_REGS, prog2 * 100)
Chengsong
parents:
diff changeset
   664
      }
Chengsong
parents:
diff changeset
   665
    }*/
Chengsong
parents:
diff changeset
   666
Chengsong
parents:
diff changeset
   667
  /*
Chengsong
parents:
diff changeset
   668
  println("Tokens of prog2")
Chengsong
parents:
diff changeset
   669
  println(tokenise(WHILE_REGS, prog2).mkString("\n"))
Chengsong
parents:
diff changeset
   670
Chengsong
parents:
diff changeset
   671
  val fib_tokens = tokenise(WHILE_REGS, prog2)
Chengsong
parents:
diff changeset
   672
  fib_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
Chengsong
parents:
diff changeset
   673
Chengsong
parents:
diff changeset
   674
Chengsong
parents:
diff changeset
   675
  val test_tokens = tokenise(WHILE_REGS, prog3)
Chengsong
parents:
diff changeset
   676
  test_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
Chengsong
parents:
diff changeset
   677
  */
Chengsong
parents:
diff changeset
   678
Chengsong
parents:
diff changeset
   679
  /*
Chengsong
parents:
diff changeset
   680
  println("time test for blexing_simp")
Chengsong
parents:
diff changeset
   681
  for (i <- 1 to 1 by 1) {
Chengsong
parents:
diff changeset
   682
    lexing_simp(WHILE_REGS, prog2 * i)
Chengsong
parents:
diff changeset
   683
    blexing_simp(WHILE_REGS, prog2 * i)
Chengsong
parents:
diff changeset
   684
    for( j <- 0 to each_simp_timeb.length - 1){
Chengsong
parents:
diff changeset
   685
      if( each_simp_timeb(j)/each_simp_time(j) >= 10.0 )
Chengsong
parents:
diff changeset
   686
        println(j, each_simp_timeb(j), each_simp_time(j))
Chengsong
parents:
diff changeset
   687
    }
Chengsong
parents:
diff changeset
   688
  }
Chengsong
parents:
diff changeset
   689
  */
Chengsong
parents:
diff changeset
   690
Chengsong
parents:
diff changeset
   691
Chengsong
parents:
diff changeset
   692
  //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART (TESTING)
Chengsong
parents:
diff changeset
   693
Chengsong
parents:
diff changeset
   694
Chengsong
parents:
diff changeset
   695
Chengsong
parents:
diff changeset
   696
  def clear() = {
Chengsong
parents:
diff changeset
   697
    print("")
Chengsong
parents:
diff changeset
   698
    //print("\33[H\33[2J")
Chengsong
parents:
diff changeset
   699
  }
Chengsong
parents:
diff changeset
   700
Chengsong
parents:
diff changeset
   701
  //testing the two lexings produce the same value
Chengsong
parents:
diff changeset
   702
  //enumerates strings of length n over alphabet cs
Chengsong
parents:
diff changeset
   703
  def strs(n: Int, cs: String) : Set[String] = {
Chengsong
parents:
diff changeset
   704
    if (n == 0) Set("")
Chengsong
parents:
diff changeset
   705
    else {
Chengsong
parents:
diff changeset
   706
      val ss = strs(n - 1, cs)
Chengsong
parents:
diff changeset
   707
      ss ++
Chengsong
parents:
diff changeset
   708
      (for (s <- ss; c <- cs.toList) yield c + s)
Chengsong
parents:
diff changeset
   709
    }
Chengsong
parents:
diff changeset
   710
  }
Chengsong
parents:
diff changeset
   711
  def enum(n: Int, s: String) : Stream[Rexp] = n match {
Chengsong
parents:
diff changeset
   712
    case 0 => ZERO #:: ONE #:: s.toStream.map(CHAR)
Chengsong
parents:
diff changeset
   713
    case n => {  
Chengsong
parents:
diff changeset
   714
      val rs = enum(n - 1, s)
Chengsong
parents:
diff changeset
   715
      rs #:::
Chengsong
parents:
diff changeset
   716
      (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) #:::
Chengsong
parents:
diff changeset
   717
      (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) #:::
Chengsong
parents:
diff changeset
   718
      (for (r1 <- rs) yield STAR(r1))
Chengsong
parents:
diff changeset
   719
    }
Chengsong
parents:
diff changeset
   720
  }
Chengsong
parents:
diff changeset
   721
Chengsong
parents:
diff changeset
   722
  //tests blexing and lexing
Chengsong
parents:
diff changeset
   723
  def tests_blexer_simp(ss: Set[String])(r: Rexp) = {
Chengsong
parents:
diff changeset
   724
    clear()
Chengsong
parents:
diff changeset
   725
    //println(s"Testing ${r}")
Chengsong
parents:
diff changeset
   726
    for (s <- ss.par) yield {
Chengsong
parents:
diff changeset
   727
      val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None)
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   728
      val res2 = Try(Some(super_blexing_simp(r, s))).getOrElse(None)
0
Chengsong
parents:
diff changeset
   729
      if (res1 != res2) println(s"Disagree on ${r} and ${s}")
Chengsong
parents:
diff changeset
   730
      if (res1 != res2) println(s"   ${res1} !=  ${res2}")
Chengsong
parents:
diff changeset
   731
      if (res1 != res2) Some((r, s)) else None
Chengsong
parents:
diff changeset
   732
    }
Chengsong
parents:
diff changeset
   733
  }
Chengsong
parents:
diff changeset
   734
Chengsong
parents:
diff changeset
   735
Chengsong
parents:
diff changeset
   736
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   737
  
0
Chengsong
parents:
diff changeset
   738
  /*
Chengsong
parents:
diff changeset
   739
  def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = {
Chengsong
parents:
diff changeset
   740
    for (s <- ss){
Chengsong
parents:
diff changeset
   741
      
Chengsong
parents:
diff changeset
   742
      val der_res = bder(c, ar) 
Chengsong
parents:
diff changeset
   743
      val simp_res = bsimp(der_res)
Chengsong
parents:
diff changeset
   744
      println(asize(der_res))
Chengsong
parents:
diff changeset
   745
      println(asize(simp_res))
Chengsong
parents:
diff changeset
   746
      single_expression_explorer(simp_res, (sc - c))
Chengsong
parents:
diff changeset
   747
    }
Chengsong
parents:
diff changeset
   748
  }*/
Chengsong
parents:
diff changeset
   749
Chengsong
parents:
diff changeset
   750
  //single_expression_explorer(internalise(("c"~("a"+"b"))%) , Set('a','b','c'))
Chengsong
parents:
diff changeset
   751
Chengsong
parents:
diff changeset
   752
Chengsong
parents:
diff changeset
   753
}
Chengsong
parents:
diff changeset
   754
Chengsong
parents:
diff changeset
   755
import Rexp.Bits
Chengsong
parents:
diff changeset
   756
abstract class ARexp 
Chengsong
parents:
diff changeset
   757
case object AZERO extends ARexp
Chengsong
parents:
diff changeset
   758
case class AONE(bs: Bits) extends ARexp
Chengsong
parents:
diff changeset
   759
case class ACHAR(bs: Bits, f: Char) extends ARexp
Chengsong
parents:
diff changeset
   760
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
Chengsong
parents:
diff changeset
   761
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
Chengsong
parents:
diff changeset
   762
case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
Chengsong
parents:
diff changeset
   763
Chengsong
parents:
diff changeset
   764
Chengsong
parents:
diff changeset
   765
Chengsong
parents:
diff changeset
   766
abstract class Val
Chengsong
parents:
diff changeset
   767
case object Empty extends Val
Chengsong
parents:
diff changeset
   768
case class Chr(c: Char) extends Val
Chengsong
parents:
diff changeset
   769
case class Sequ(v1: Val, v2: Val) extends Val
Chengsong
parents:
diff changeset
   770
case class Left(v: Val) extends Val
Chengsong
parents:
diff changeset
   771
case class Right(v: Val) extends Val
Chengsong
parents:
diff changeset
   772
case class Stars(vs: List[Val]) extends Val
Chengsong
parents:
diff changeset
   773
case class Rec(x: String, v: Val) extends Val
Chengsong
parents:
diff changeset
   774
//case class Pos(i: Int, v: Val) extends Val
Chengsong
parents:
diff changeset
   775
case object Prd extends Val