lex_blex_Frankensteined.scala
author Chengsong
Wed, 10 Apr 2019 16:34:34 +0100 (2019-04-10)
changeset 11 9c1ca6d6e190
parent 5 622ddbb1223a
child 12 768b833d6230
permissions -rw-r--r--
The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis. This causes the exception. Two ways of fixing this: delete C(C) construct (easy way around) or amend retrieve code etc. since the C(C) construct is intended for decoding Pred, and we don't use pred now, we shall delete this. This is the last veersion that contains C(CHAR)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
Chengsong
parents:
diff changeset
     1
package RexpRelated
Chengsong
parents:
diff changeset
     2
import scala.language.implicitConversions    
Chengsong
parents:
diff changeset
     3
import scala.language.reflectiveCalls
Chengsong
parents:
diff changeset
     4
import scala.annotation.tailrec   
Chengsong
parents:
diff changeset
     5
import scala.util.Try
Chengsong
parents:
diff changeset
     6
Chengsong
parents:
diff changeset
     7
abstract class Bit
Chengsong
parents:
diff changeset
     8
case object Z extends Bit
Chengsong
parents:
diff changeset
     9
case object S extends Bit
Chengsong
parents:
diff changeset
    10
case class C(c: Char) extends Bit
Chengsong
parents:
diff changeset
    11
Chengsong
parents:
diff changeset
    12
Chengsong
parents:
diff changeset
    13
abstract class Rexp 
Chengsong
parents:
diff changeset
    14
case object ZERO extends Rexp
Chengsong
parents:
diff changeset
    15
case object ONE extends Rexp
Chengsong
parents:
diff changeset
    16
case class CHAR(c: Char) extends Rexp
Chengsong
parents:
diff changeset
    17
case class ALTS(rs: List[Rexp]) extends Rexp 
Chengsong
parents:
diff changeset
    18
case class SEQ(r1: Rexp, r2: Rexp) extends Rexp 
Chengsong
parents:
diff changeset
    19
case class STAR(r: Rexp) extends Rexp 
Chengsong
parents:
diff changeset
    20
case class RECD(x: String, r: Rexp) extends Rexp
Chengsong
parents:
diff changeset
    21
Chengsong
parents:
diff changeset
    22
Chengsong
parents:
diff changeset
    23
Chengsong
parents:
diff changeset
    24
object Rexp{
Chengsong
parents:
diff changeset
    25
  type Bits = List[Bit]
Chengsong
parents:
diff changeset
    26
  // abbreviations
Chengsong
parents:
diff changeset
    27
  type Mon = (Char, Rexp)
Chengsong
parents:
diff changeset
    28
  type Lin = Set[Mon]
Chengsong
parents:
diff changeset
    29
  def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2))
Chengsong
parents:
diff changeset
    30
  def PLUS(r: Rexp) = SEQ(r, STAR(r))
Chengsong
parents:
diff changeset
    31
  def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2))
Chengsong
parents:
diff changeset
    32
Chengsong
parents:
diff changeset
    33
Chengsong
parents:
diff changeset
    34
  def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
Chengsong
parents:
diff changeset
    35
    case Nil => Nil
Chengsong
parents:
diff changeset
    36
    case (x::xs) => {
Chengsong
parents:
diff changeset
    37
      val res = f(x)
Chengsong
parents:
diff changeset
    38
      if (acc.contains(res)) distinctBy(xs, f, acc)  
Chengsong
parents:
diff changeset
    39
      else x::distinctBy(xs, f, res::acc)
Chengsong
parents:
diff changeset
    40
    }
Chengsong
parents:
diff changeset
    41
  } 
Chengsong
parents:
diff changeset
    42
  // some convenience for typing in regular expressions
Chengsong
parents:
diff changeset
    43
  def charlist2rexp(s : List[Char]): Rexp = s match {
Chengsong
parents:
diff changeset
    44
    case Nil => ONE
Chengsong
parents:
diff changeset
    45
    case c::Nil => CHAR(c)
Chengsong
parents:
diff changeset
    46
    case c::s => SEQ(CHAR(c), charlist2rexp(s))
Chengsong
parents:
diff changeset
    47
  }
Chengsong
parents:
diff changeset
    48
  implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
Chengsong
parents:
diff changeset
    49
Chengsong
parents:
diff changeset
    50
  implicit def RexpOps(r: Rexp) = new {
Chengsong
parents:
diff changeset
    51
    def | (s: Rexp) = ALT(r, s)
Chengsong
parents:
diff changeset
    52
    def % = STAR(r)
Chengsong
parents:
diff changeset
    53
    def ~ (s: Rexp) = SEQ(r, s)
Chengsong
parents:
diff changeset
    54
  }
Chengsong
parents:
diff changeset
    55
Chengsong
parents:
diff changeset
    56
  implicit def stringOps(s: String) = new {
Chengsong
parents:
diff changeset
    57
    def | (r: Rexp) = ALT(s, r)
Chengsong
parents:
diff changeset
    58
    def | (r: String) = ALT(s, r)
Chengsong
parents:
diff changeset
    59
    def % = STAR(s)
Chengsong
parents:
diff changeset
    60
    def ~ (r: Rexp) = SEQ(s, r)
Chengsong
parents:
diff changeset
    61
    def ~ (r: String) = SEQ(s, r)
Chengsong
parents:
diff changeset
    62
    def $ (r: Rexp) = RECD(s, r)
Chengsong
parents:
diff changeset
    63
  }
Chengsong
parents:
diff changeset
    64
Chengsong
parents:
diff changeset
    65
  // translation into ARexps
Chengsong
parents:
diff changeset
    66
  def fuse(bs: Bits, r: ARexp) : ARexp = r match {
Chengsong
parents:
diff changeset
    67
    case AZERO => AZERO
Chengsong
parents:
diff changeset
    68
    case AONE(cs) => AONE(bs ++ cs)
Chengsong
parents:
diff changeset
    69
    case ACHAR(cs, f) => ACHAR(bs ++ cs, f)
Chengsong
parents:
diff changeset
    70
    case AALTS(cs, rs) => AALTS(bs ++ cs, rs)
Chengsong
parents:
diff changeset
    71
    case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2)
Chengsong
parents:
diff changeset
    72
    case ASTAR(cs, r) => ASTAR(bs ++ cs, r)
Chengsong
parents:
diff changeset
    73
  }
Chengsong
parents:
diff changeset
    74
Chengsong
parents:
diff changeset
    75
  def internalise(r: Rexp) : ARexp = r match {
Chengsong
parents:
diff changeset
    76
    case ZERO => AZERO
Chengsong
parents:
diff changeset
    77
    case ONE => AONE(Nil)
Chengsong
parents:
diff changeset
    78
    case CHAR(c) => ACHAR(Nil, c)
Chengsong
parents:
diff changeset
    79
    case ALTS(List(r1, r2)) => 
Chengsong
parents:
diff changeset
    80
      AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2))))
Chengsong
parents:
diff changeset
    81
    case ALTS(r1::rs) => {
Chengsong
parents:
diff changeset
    82
      val AALTS(Nil, rs2) = internalise(ALTS(rs))
Chengsong
parents:
diff changeset
    83
      AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _)))
Chengsong
parents:
diff changeset
    84
    }
Chengsong
parents:
diff changeset
    85
    case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2))
Chengsong
parents:
diff changeset
    86
    case STAR(r) => ASTAR(Nil, internalise(r))
Chengsong
parents:
diff changeset
    87
    case RECD(x, r) => internalise(r)
Chengsong
parents:
diff changeset
    88
  }
Chengsong
parents:
diff changeset
    89
Chengsong
parents:
diff changeset
    90
  internalise(("a" | "ab") ~ ("b" | ""))
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
    91
0
Chengsong
parents:
diff changeset
    92
  def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
Chengsong
parents:
diff changeset
    93
    case (ONE, bs) => (Empty, bs)
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
    94
    case (CHAR(f), C(c)::bs) => (Chr(c), bs)
11
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
    95
    //case (ALTS(r::Nil), bs) => decode_aux(r, bs)//this case seems only used when we simp a regex before derivatives and it contains things like alt("a")
0
Chengsong
parents:
diff changeset
    96
    case (ALTS(rs), bs) => bs match {
Chengsong
parents:
diff changeset
    97
      case Z::bs1 => {
Chengsong
parents:
diff changeset
    98
        val (v, bs2) = decode_aux(rs.head, bs1)
Chengsong
parents:
diff changeset
    99
        (Left(v), bs2)
Chengsong
parents:
diff changeset
   100
      }
Chengsong
parents:
diff changeset
   101
      case S::bs1 => {
Chengsong
parents:
diff changeset
   102
        val (v, bs2) = decode_aux(ALTS(rs.tail), bs1)
Chengsong
parents:
diff changeset
   103
        (Right(v), bs2)			
Chengsong
parents:
diff changeset
   104
      }
Chengsong
parents:
diff changeset
   105
    }
Chengsong
parents:
diff changeset
   106
    case (SEQ(r1, r2), bs) => {
Chengsong
parents:
diff changeset
   107
      val (v1, bs1) = decode_aux(r1, bs)
Chengsong
parents:
diff changeset
   108
      val (v2, bs2) = decode_aux(r2, bs1)
Chengsong
parents:
diff changeset
   109
      (Sequ(v1, v2), bs2)
Chengsong
parents:
diff changeset
   110
    }
Chengsong
parents:
diff changeset
   111
    case (STAR(r1), S::bs) => {
Chengsong
parents:
diff changeset
   112
      val (v, bs1) = decode_aux(r1, bs)
Chengsong
parents:
diff changeset
   113
      //println(v)
Chengsong
parents:
diff changeset
   114
      val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1)
Chengsong
parents:
diff changeset
   115
      (Stars(v::vs), bs2)
Chengsong
parents:
diff changeset
   116
    }
Chengsong
parents:
diff changeset
   117
    case (STAR(_), Z::bs) => (Stars(Nil), bs)
Chengsong
parents:
diff changeset
   118
    case (RECD(x, r1), bs) => {
Chengsong
parents:
diff changeset
   119
      val (v, bs1) = decode_aux(r1, bs)
Chengsong
parents:
diff changeset
   120
      (Rec(x, v), bs1)
Chengsong
parents:
diff changeset
   121
    }
Chengsong
parents:
diff changeset
   122
  }
Chengsong
parents:
diff changeset
   123
Chengsong
parents:
diff changeset
   124
  def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
Chengsong
parents:
diff changeset
   125
    case (v, Nil) => v
Chengsong
parents:
diff changeset
   126
    case _ => throw new Exception("Not decodable")
Chengsong
parents:
diff changeset
   127
  }
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   128
11
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   129
  def code(v: Val): Bits = v match {
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   130
    case Empty => Nil
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   131
    case Left(v) => Z :: code(v)
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   132
    case Right(v) => S :: code(v)
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   133
    case Sequ(v1, v2) => code(v1) ::: code(v2)
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   134
    case Stars(Nil) => Z::Nil
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   135
    case Stars(v::vs) => S::code(v) ::: code(Stars(vs))
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   136
  }
0
Chengsong
parents:
diff changeset
   137
11
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   138
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   139
  def retrieve(r: ARexp, v: Val): Bits = (r,v) match {
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   140
    case (AONE(bs), Empty) => bs
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   141
    case (ACHAR(bs, c), Chr(d)) => bs
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   142
    case (AALTS(bs, as), Left(v)) => bs ++ retrieve(as.head,v)
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   143
    case (AALTS(bs, as), Right(v)) => bs ++ retrieve(AALTS(Nil,as.tail),v)
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   144
    case (ASEQ(bs, a1, a2), Sequ(v1, v2)) => bs ++ retrieve(a1, v1) ++ retrieve(a2, v2)
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   145
    case (ASTAR(bs, a), Stars(Nil)) => bs ++ List(Z) 
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   146
    case (ASTAR(bs, a), Stars(v::vs)) => bs ++ List(S) ++ retrieve(a, v) ++ retrieve(ASTAR(Nil, a), Stars(vs))
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   147
  }
0
Chengsong
parents:
diff changeset
   148
  //erase function: extracts the regx from Aregex
Chengsong
parents:
diff changeset
   149
  def erase(r:ARexp): Rexp = r match{
Chengsong
parents:
diff changeset
   150
    case AZERO => ZERO
Chengsong
parents:
diff changeset
   151
    case AONE(_) => ONE
Chengsong
parents:
diff changeset
   152
    case ACHAR(bs, f) => CHAR(f)
Chengsong
parents:
diff changeset
   153
    case AALTS(bs, rs) => ALTS(rs.map(erase(_)))
Chengsong
parents:
diff changeset
   154
    case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2))
Chengsong
parents:
diff changeset
   155
    case ASTAR(cs, r)=> STAR(erase(r))
Chengsong
parents:
diff changeset
   156
  }
Chengsong
parents:
diff changeset
   157
Chengsong
parents:
diff changeset
   158
  //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART
Chengsong
parents:
diff changeset
   159
  // nullable function: tests whether the regular 
Chengsong
parents:
diff changeset
   160
  // expression can recognise the empty string
Chengsong
parents:
diff changeset
   161
  def nullable (r: Rexp) : Boolean = r match {
Chengsong
parents:
diff changeset
   162
    case ZERO => false
Chengsong
parents:
diff changeset
   163
    case ONE => true
Chengsong
parents:
diff changeset
   164
    case CHAR(_) => false
Chengsong
parents:
diff changeset
   165
    case ALTS(rs) => rs.exists(nullable)
Chengsong
parents:
diff changeset
   166
    case SEQ(r1, r2) => nullable(r1) && nullable(r2)
Chengsong
parents:
diff changeset
   167
    case STAR(_) => true
Chengsong
parents:
diff changeset
   168
    case RECD(_, r) => nullable(r)
Chengsong
parents:
diff changeset
   169
    //case PLUS(r) => nullable(r)
Chengsong
parents:
diff changeset
   170
  }
Chengsong
parents:
diff changeset
   171
Chengsong
parents:
diff changeset
   172
  // derivative of a regular expression w.r.t. a character
Chengsong
parents:
diff changeset
   173
  def der (c: Char, r: Rexp) : Rexp = r match {
Chengsong
parents:
diff changeset
   174
    case ZERO => ZERO
Chengsong
parents:
diff changeset
   175
    case ONE => ZERO
Chengsong
parents:
diff changeset
   176
    case CHAR(f) => if (c == f) ONE else ZERO
Chengsong
parents:
diff changeset
   177
    case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2)))
Chengsong
parents:
diff changeset
   178
    case SEQ(r1, r2) => 
Chengsong
parents:
diff changeset
   179
      if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2)))
Chengsong
parents:
diff changeset
   180
      else SEQ(der(c, r1), r2)
Chengsong
parents:
diff changeset
   181
    case STAR(r) => SEQ(der(c, r), STAR(r))
Chengsong
parents:
diff changeset
   182
    case RECD(_, r1) => der(c, r1)
Chengsong
parents:
diff changeset
   183
    //case PLUS(r) => SEQ(der(c, r), STAR(r))
Chengsong
parents:
diff changeset
   184
  }
Chengsong
parents:
diff changeset
   185
Chengsong
parents:
diff changeset
   186
  def flatten(v: Val) : String = v match {
Chengsong
parents:
diff changeset
   187
    case Empty => ""
Chengsong
parents:
diff changeset
   188
    case Chr(c) => c.toString
Chengsong
parents:
diff changeset
   189
    case Left(v) => flatten(v)
Chengsong
parents:
diff changeset
   190
    case Right(v) => flatten(v)
Chengsong
parents:
diff changeset
   191
    case Sequ(v1, v2) => flatten(v1) + flatten(v2)
Chengsong
parents:
diff changeset
   192
    case Stars(vs) => vs.map(flatten).mkString
Chengsong
parents:
diff changeset
   193
    case Rec(_, v) => flatten(v)
Chengsong
parents:
diff changeset
   194
  }
Chengsong
parents:
diff changeset
   195
Chengsong
parents:
diff changeset
   196
  // extracts an environment from a value
Chengsong
parents:
diff changeset
   197
  def env(v: Val) : List[(String, String)] = v match {
Chengsong
parents:
diff changeset
   198
    case Empty => Nil
Chengsong
parents:
diff changeset
   199
    case Chr(c) => Nil
Chengsong
parents:
diff changeset
   200
    case Left(v) => env(v)
Chengsong
parents:
diff changeset
   201
    case Right(v) => env(v)
Chengsong
parents:
diff changeset
   202
    case Sequ(v1, v2) => env(v1) ::: env(v2)
Chengsong
parents:
diff changeset
   203
    case Stars(vs) => vs.flatMap(env)
Chengsong
parents:
diff changeset
   204
    case Rec(x, v) => (x, flatten(v))::env(v)
Chengsong
parents:
diff changeset
   205
  }
Chengsong
parents:
diff changeset
   206
Chengsong
parents:
diff changeset
   207
Chengsong
parents:
diff changeset
   208
  // injection part
Chengsong
parents:
diff changeset
   209
  def mkeps(r: Rexp) : Val = r match {
Chengsong
parents:
diff changeset
   210
    case ONE => Empty
Chengsong
parents:
diff changeset
   211
    case ALTS(List(r1, r2)) => 
Chengsong
parents:
diff changeset
   212
      if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2))
Chengsong
parents:
diff changeset
   213
    case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2))
Chengsong
parents:
diff changeset
   214
    case STAR(r) => Stars(Nil)
Chengsong
parents:
diff changeset
   215
    case RECD(x, r) => Rec(x, mkeps(r))
Chengsong
parents:
diff changeset
   216
    //case PLUS(r) => Stars(List(mkeps(r)))
Chengsong
parents:
diff changeset
   217
  }
Chengsong
parents:
diff changeset
   218
Chengsong
parents:
diff changeset
   219
  def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
Chengsong
parents:
diff changeset
   220
    case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
Chengsong
parents:
diff changeset
   221
    case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2)
Chengsong
parents:
diff changeset
   222
    case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2)
Chengsong
parents:
diff changeset
   223
    case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2))
Chengsong
parents:
diff changeset
   224
    case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1))
Chengsong
parents:
diff changeset
   225
    case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2))
Chengsong
parents:
diff changeset
   226
    case (CHAR(_), Empty) => Chr(c) 
Chengsong
parents:
diff changeset
   227
    case (RECD(x, r1), _) => Rec(x, inj(r1, c, v))
Chengsong
parents:
diff changeset
   228
    //case (PLUS(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs)
Chengsong
parents:
diff changeset
   229
  }
Chengsong
parents:
diff changeset
   230
  def lex(r: Rexp, s: List[Char]) : Val = s match {
Chengsong
parents:
diff changeset
   231
    case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   232
    case c::cs => inj(r, c, lex(der(c, r), cs))
Chengsong
parents:
diff changeset
   233
  }
Chengsong
parents:
diff changeset
   234
Chengsong
parents:
diff changeset
   235
  def lexing(r: Rexp, s: String) : Val = lex(r, s.toList)
Chengsong
parents:
diff changeset
   236
Chengsong
parents:
diff changeset
   237
  // some "rectification" functions for simplification
Chengsong
parents:
diff changeset
   238
  def F_ID(v: Val): Val = v
Chengsong
parents:
diff changeset
   239
  def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v))
Chengsong
parents:
diff changeset
   240
  def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v))
Chengsong
parents:
diff changeset
   241
  def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
Chengsong
parents:
diff changeset
   242
    case Right(v) => Right(f2(v))
Chengsong
parents:
diff changeset
   243
    case Left(v) => Left(f1(v))
Chengsong
parents:
diff changeset
   244
  }
Chengsong
parents:
diff changeset
   245
  def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
Chengsong
parents:
diff changeset
   246
    case Sequ(v1, v2) => Sequ(f1(v1), f2(v2))
Chengsong
parents:
diff changeset
   247
  }
Chengsong
parents:
diff changeset
   248
  def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = 
Chengsong
parents:
diff changeset
   249
    (v:Val) => Sequ(f1(Empty), f2(v))
Chengsong
parents:
diff changeset
   250
  def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = 
Chengsong
parents:
diff changeset
   251
    (v:Val) => Sequ(f1(v), f2(Empty))
Chengsong
parents:
diff changeset
   252
  def F_RECD(f: Val => Val) = (v:Val) => v match {
Chengsong
parents:
diff changeset
   253
    case Rec(x, v) => Rec(x, f(v))
Chengsong
parents:
diff changeset
   254
  }
Chengsong
parents:
diff changeset
   255
  def F_ERROR(v: Val): Val = throw new Exception("error")
Chengsong
parents:
diff changeset
   256
Chengsong
parents:
diff changeset
   257
  // simplification of regular expressions returning also an
Chengsong
parents:
diff changeset
   258
  // rectification function; no simplification under STAR 
Chengsong
parents:
diff changeset
   259
  def simp(r: Rexp): (Rexp, Val => Val) = r match {
Chengsong
parents:
diff changeset
   260
    case ALTS(List(r1, r2)) => {
Chengsong
parents:
diff changeset
   261
      val (r1s, f1s) = simp(r1)
Chengsong
parents:
diff changeset
   262
      val (r2s, f2s) = simp(r2)
Chengsong
parents:
diff changeset
   263
      (r1s, r2s) match {
Chengsong
parents:
diff changeset
   264
        case (ZERO, _) => (r2s, F_RIGHT(f2s))
Chengsong
parents:
diff changeset
   265
        case (_, ZERO) => (r1s, F_LEFT(f1s))
Chengsong
parents:
diff changeset
   266
        case _ => if (r1s == r2s) (r1s, F_LEFT(f1s))
Chengsong
parents:
diff changeset
   267
                  else (ALTS(List(r1s, r2s)), F_ALT(f1s, f2s)) 
Chengsong
parents:
diff changeset
   268
      }
Chengsong
parents:
diff changeset
   269
    }
Chengsong
parents:
diff changeset
   270
    case SEQ(r1, r2) => {
Chengsong
parents:
diff changeset
   271
      val (r1s, f1s) = simp(r1)
Chengsong
parents:
diff changeset
   272
      val (r2s, f2s) = simp(r2)
Chengsong
parents:
diff changeset
   273
      (r1s, r2s) match {
Chengsong
parents:
diff changeset
   274
        case (ZERO, _) => (ZERO, F_ERROR)
Chengsong
parents:
diff changeset
   275
        case (_, ZERO) => (ZERO, F_ERROR)
Chengsong
parents:
diff changeset
   276
        case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s))
Chengsong
parents:
diff changeset
   277
        case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s))
Chengsong
parents:
diff changeset
   278
        case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s))
Chengsong
parents:
diff changeset
   279
      }
Chengsong
parents:
diff changeset
   280
    }
Chengsong
parents:
diff changeset
   281
    case RECD(x, r1) => {
Chengsong
parents:
diff changeset
   282
      val (r1s, f1s) = simp(r1)
Chengsong
parents:
diff changeset
   283
      (RECD(x, r1s), F_RECD(f1s))
Chengsong
parents:
diff changeset
   284
    }
Chengsong
parents:
diff changeset
   285
    case r => (r, F_ID)
Chengsong
parents:
diff changeset
   286
  }
Chengsong
parents:
diff changeset
   287
  /*
Chengsong
parents:
diff changeset
   288
  val each_simp_time = scala.collection.mutable.ArrayBuffer.empty[Long]
Chengsong
parents:
diff changeset
   289
  val each_simp_timeb = scala.collection.mutable.ArrayBuffer.empty[Long]
Chengsong
parents:
diff changeset
   290
  */
Chengsong
parents:
diff changeset
   291
  def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
Chengsong
parents:
diff changeset
   292
    case Nil => {
Chengsong
parents:
diff changeset
   293
      if (nullable(r)) {
Chengsong
parents:
diff changeset
   294
        mkeps(r) 
Chengsong
parents:
diff changeset
   295
      }
Chengsong
parents:
diff changeset
   296
      else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   297
    }
Chengsong
parents:
diff changeset
   298
    case c::cs => {
Chengsong
parents:
diff changeset
   299
      val (r_simp, f_simp) = simp(der(c, r))
Chengsong
parents:
diff changeset
   300
      inj(r, c, f_simp(lex_simp(r_simp, cs)))
Chengsong
parents:
diff changeset
   301
    }
Chengsong
parents:
diff changeset
   302
  }
Chengsong
parents:
diff changeset
   303
Chengsong
parents:
diff changeset
   304
  def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList)
Chengsong
parents:
diff changeset
   305
Chengsong
parents:
diff changeset
   306
  //println(lexing_simp(("a" | "ab") ~ ("b" | ""), "ab"))
Chengsong
parents:
diff changeset
   307
Chengsong
parents:
diff changeset
   308
  // filters out all white spaces
Chengsong
parents:
diff changeset
   309
  def tokenise(r: Rexp, s: String) = 
Chengsong
parents:
diff changeset
   310
    env(lexing_simp(r, s)).filterNot { _._1 == "w"}
Chengsong
parents:
diff changeset
   311
Chengsong
parents:
diff changeset
   312
Chengsong
parents:
diff changeset
   313
  //reads the string from a file 
Chengsong
parents:
diff changeset
   314
  def fromFile(name: String) : String = 
Chengsong
parents:
diff changeset
   315
    io.Source.fromFile(name).mkString
Chengsong
parents:
diff changeset
   316
Chengsong
parents:
diff changeset
   317
  def tokenise_file(r: Rexp, name: String) = 
Chengsong
parents:
diff changeset
   318
    tokenise(r, fromFile(name))
Chengsong
parents:
diff changeset
   319
  
Chengsong
parents:
diff changeset
   320
  //   Testing
Chengsong
parents:
diff changeset
   321
  //============
Chengsong
parents:
diff changeset
   322
Chengsong
parents:
diff changeset
   323
  def time[T](code: => T) = {
Chengsong
parents:
diff changeset
   324
    val start = System.nanoTime()
Chengsong
parents:
diff changeset
   325
    val result = code
Chengsong
parents:
diff changeset
   326
    val end = System.nanoTime()
Chengsong
parents:
diff changeset
   327
    println((end - start)/1.0e9)
Chengsong
parents:
diff changeset
   328
    result
Chengsong
parents:
diff changeset
   329
  }
Chengsong
parents:
diff changeset
   330
Chengsong
parents:
diff changeset
   331
  //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART
Chengsong
parents:
diff changeset
   332
Chengsong
parents:
diff changeset
   333
  // bnullable function: tests whether the aregular 
Chengsong
parents:
diff changeset
   334
  // expression can recognise the empty string
Chengsong
parents:
diff changeset
   335
  def bnullable (r: ARexp) : Boolean = r match {
Chengsong
parents:
diff changeset
   336
    case AZERO => false
Chengsong
parents:
diff changeset
   337
    case AONE(_) => true
Chengsong
parents:
diff changeset
   338
    case ACHAR(_,_) => false
Chengsong
parents:
diff changeset
   339
    case AALTS(_, rs) => rs.exists(bnullable)
Chengsong
parents:
diff changeset
   340
    case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2)
Chengsong
parents:
diff changeset
   341
    case ASTAR(_, _) => true
Chengsong
parents:
diff changeset
   342
  }
Chengsong
parents:
diff changeset
   343
Chengsong
parents:
diff changeset
   344
  def mkepsBC(r: ARexp) : Bits = r match {
Chengsong
parents:
diff changeset
   345
    case AONE(bs) => bs
Chengsong
parents:
diff changeset
   346
    case AALTS(bs, rs) => {
Chengsong
parents:
diff changeset
   347
      val n = rs.indexWhere(bnullable)
Chengsong
parents:
diff changeset
   348
      bs ++ mkepsBC(rs(n))
Chengsong
parents:
diff changeset
   349
    }
Chengsong
parents:
diff changeset
   350
    case ASEQ(bs, r1, r2) => bs ++ mkepsBC(r1) ++ mkepsBC(r2)
Chengsong
parents:
diff changeset
   351
    case ASTAR(bs, r) => bs ++ List(Z)
Chengsong
parents:
diff changeset
   352
  }
Chengsong
parents:
diff changeset
   353
Chengsong
parents:
diff changeset
   354
  // derivative of a regular expression w.r.t. a character
Chengsong
parents:
diff changeset
   355
  def bder(c: Char, r: ARexp) : ARexp = r match {
Chengsong
parents:
diff changeset
   356
    case AZERO => AZERO
Chengsong
parents:
diff changeset
   357
    case AONE(_) => AZERO
Chengsong
parents:
diff changeset
   358
    case ACHAR(bs, f) => if (c == f) AONE(bs:::List(C(c))) else AZERO
Chengsong
parents:
diff changeset
   359
    case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _)))
Chengsong
parents:
diff changeset
   360
    case ASEQ(bs, r1, r2) => 
Chengsong
parents:
diff changeset
   361
      if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(mkepsBC(r1), bder(c, r2)))
Chengsong
parents:
diff changeset
   362
      else ASEQ(bs, bder(c, r1), r2)
Chengsong
parents:
diff changeset
   363
    case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r))
Chengsong
parents:
diff changeset
   364
  }
Chengsong
parents:
diff changeset
   365
Chengsong
parents:
diff changeset
   366
Chengsong
parents:
diff changeset
   367
  def ders (s: List[Char], r: Rexp) : Rexp = s match {
Chengsong
parents:
diff changeset
   368
    case Nil => r
Chengsong
parents:
diff changeset
   369
    case c::s => ders(s, der(c, r))
Chengsong
parents:
diff changeset
   370
  }
Chengsong
parents:
diff changeset
   371
Chengsong
parents:
diff changeset
   372
  // derivative w.r.t. a string (iterates bder)
Chengsong
parents:
diff changeset
   373
  @tailrec
Chengsong
parents:
diff changeset
   374
  def bders (s: List[Char], r: ARexp) : ARexp = s match {
Chengsong
parents:
diff changeset
   375
    case Nil => r
Chengsong
parents:
diff changeset
   376
    case c::s => bders(s, bder(c, r))
Chengsong
parents:
diff changeset
   377
  }
Chengsong
parents:
diff changeset
   378
Chengsong
parents:
diff changeset
   379
  def flats(rs: List[ARexp]): List[ARexp] = rs match {
Chengsong
parents:
diff changeset
   380
      case Nil => Nil
Chengsong
parents:
diff changeset
   381
      case AZERO :: rs1 => flats(rs1)
Chengsong
parents:
diff changeset
   382
      case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2)
Chengsong
parents:
diff changeset
   383
      case r1 :: rs2 => r1 :: flats(rs2)
Chengsong
parents:
diff changeset
   384
    }
Chengsong
parents:
diff changeset
   385
  def rflats(rs: List[Rexp]): List[Rexp] = rs match {
Chengsong
parents:
diff changeset
   386
    case Nil => Nil
Chengsong
parents:
diff changeset
   387
    case ZERO :: rs1 => rflats(rs1)
Chengsong
parents:
diff changeset
   388
    case ALTS(rs1) :: rs2 => rs1 ::: rflats(rs2)
Chengsong
parents:
diff changeset
   389
    case r1 :: rs2 => r1 :: rflats(rs2)
Chengsong
parents:
diff changeset
   390
  }
Chengsong
parents:
diff changeset
   391
  var flats_time = 0L
Chengsong
parents:
diff changeset
   392
  var dist_time = 0L
Chengsong
parents:
diff changeset
   393
  
Chengsong
parents:
diff changeset
   394
  def bsimp(r: ARexp): ARexp = r match {
Chengsong
parents:
diff changeset
   395
    case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
Chengsong
parents:
diff changeset
   396
        case (AZERO, _) => AZERO
Chengsong
parents:
diff changeset
   397
        case (_, AZERO) => AZERO
Chengsong
parents:
diff changeset
   398
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)
Chengsong
parents:
diff changeset
   399
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
Chengsong
parents:
diff changeset
   400
    }
Chengsong
parents:
diff changeset
   401
    case AALTS(bs1, rs) => {
Chengsong
parents:
diff changeset
   402
      val rs_simp = rs.map(bsimp)
Chengsong
parents:
diff changeset
   403
      val flat_res = flats(rs_simp)
Chengsong
parents:
diff changeset
   404
      val dist_res = distinctBy(flat_res, erase)
Chengsong
parents:
diff changeset
   405
      dist_res match {
Chengsong
parents:
diff changeset
   406
        case Nil => AZERO
Chengsong
parents:
diff changeset
   407
        case s :: Nil => fuse(bs1, s)
Chengsong
parents:
diff changeset
   408
        case rs => AALTS(bs1, rs)  
Chengsong
parents:
diff changeset
   409
      }
Chengsong
parents:
diff changeset
   410
    }
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   411
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
0
Chengsong
parents:
diff changeset
   412
    case r => r
Chengsong
parents:
diff changeset
   413
  }
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   414
  def super_bsimp(r: ARexp): ARexp = r match {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   415
    case ASEQ(bs1, r1, r2) => (super_bsimp(r1), super_bsimp(r2)) match {
0
Chengsong
parents:
diff changeset
   416
        case (AZERO, _) => AZERO
Chengsong
parents:
diff changeset
   417
        case (_, AZERO) => AZERO
11
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   418
        case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s)//万一是(r1, alts(rs))这种形式呢
9c1ca6d6e190 The C(Char) construct is incompatible with the code and retrieve in Fahad's thesis.
Chengsong
parents: 5
diff changeset
   419
        case (AALTS(bs2, rs), r2) => AALTS(bs1 ++ bs2, rs.map(r => r match {case AONE(bs3) => fuse(bs3, r2) case r => ASEQ(Nil, r, r2)} ) ) 
0
Chengsong
parents:
diff changeset
   420
        case (r1s, r2s) => ASEQ(bs1, r1s, r2s)
Chengsong
parents:
diff changeset
   421
    }
Chengsong
parents:
diff changeset
   422
    case AALTS(bs1, rs) => {
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   423
      val rs_simp = rs.map(super_bsimp)
0
Chengsong
parents:
diff changeset
   424
      val flat_res = flats(rs_simp)
Chengsong
parents:
diff changeset
   425
      val dist_res = distinctBy(flat_res, erase)
Chengsong
parents:
diff changeset
   426
      dist_res match {
Chengsong
parents:
diff changeset
   427
        case Nil => AZERO
Chengsong
parents:
diff changeset
   428
        case s :: Nil => fuse(bs1, s)
Chengsong
parents:
diff changeset
   429
        case rs => AALTS(bs1, rs)  
Chengsong
parents:
diff changeset
   430
      }
Chengsong
parents:
diff changeset
   431
    }
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   432
    //case ASTAR(bs, r) => ASTAR(bs, bsimp(r))
0
Chengsong
parents:
diff changeset
   433
    case r => r
Chengsong
parents:
diff changeset
   434
  }
Chengsong
parents:
diff changeset
   435
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   436
0
Chengsong
parents:
diff changeset
   437
  def simp_weakened(r: Rexp): Rexp = r match {
Chengsong
parents:
diff changeset
   438
    case SEQ(r1, r2) => (simp_weakened(r1), r2) match {
Chengsong
parents:
diff changeset
   439
        case (ZERO, _) => ZERO
Chengsong
parents:
diff changeset
   440
        case (_, ZERO) => ZERO
Chengsong
parents:
diff changeset
   441
        case (ONE, r2s) => r2s
Chengsong
parents:
diff changeset
   442
        case (r1s, r2s) => SEQ(r1s, r2s)
Chengsong
parents:
diff changeset
   443
    }
Chengsong
parents:
diff changeset
   444
    case ALTS(rs) => {
Chengsong
parents:
diff changeset
   445
      val rs_simp = rs.map(simp_weakened)
Chengsong
parents:
diff changeset
   446
      val flat_res = rflats(rs_simp)
Chengsong
parents:
diff changeset
   447
      val dist_res = rs_simp.distinct
Chengsong
parents:
diff changeset
   448
      dist_res match {
Chengsong
parents:
diff changeset
   449
        case Nil => ZERO
Chengsong
parents:
diff changeset
   450
        case s :: Nil => s
Chengsong
parents:
diff changeset
   451
        case rs => ALTS(rs)  
Chengsong
parents:
diff changeset
   452
      }
Chengsong
parents:
diff changeset
   453
    }
Chengsong
parents:
diff changeset
   454
    case STAR(r) => STAR(simp_weakened(r))
Chengsong
parents:
diff changeset
   455
    case r => r
Chengsong
parents:
diff changeset
   456
  }
Chengsong
parents:
diff changeset
   457
    
Chengsong
parents:
diff changeset
   458
  
Chengsong
parents:
diff changeset
   459
  //----------------------------------------------------------------------------experiment bsimp
Chengsong
parents:
diff changeset
   460
  /*
Chengsong
parents:
diff changeset
   461
  def bders_simp (s: List[Char], r: ARexp) : ARexp = s match {
Chengsong
parents:
diff changeset
   462
    case Nil => r
Chengsong
parents:
diff changeset
   463
    case c::s => bders_simp(s, bsimp(bder(c, r)))
Chengsong
parents:
diff changeset
   464
  }
Chengsong
parents:
diff changeset
   465
  */
Chengsong
parents:
diff changeset
   466
  /*
Chengsong
parents:
diff changeset
   467
  def time[T](code: => T) = {
Chengsong
parents:
diff changeset
   468
    val start = System.nanoTime()
Chengsong
parents:
diff changeset
   469
    val result = code
Chengsong
parents:
diff changeset
   470
    val end = System.nanoTime()
Chengsong
parents:
diff changeset
   471
    println((end - start)/1.0e9)
Chengsong
parents:
diff changeset
   472
    result
Chengsong
parents:
diff changeset
   473
  }
Chengsong
parents:
diff changeset
   474
  */
Chengsong
parents:
diff changeset
   475
  // main unsimplified lexing function (produces a value)
Chengsong
parents:
diff changeset
   476
  def blex(r: ARexp, s: List[Char]) : Bits = s match {
Chengsong
parents:
diff changeset
   477
    case Nil => if (bnullable(r)) mkepsBC(r) else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   478
    case c::cs => {
Chengsong
parents:
diff changeset
   479
      val der_res = bder(c,r)
Chengsong
parents:
diff changeset
   480
      blex(der_res, cs)
Chengsong
parents:
diff changeset
   481
    }
Chengsong
parents:
diff changeset
   482
  }
Chengsong
parents:
diff changeset
   483
Chengsong
parents:
diff changeset
   484
  def bpre_lexing(r: Rexp, s: String) = blex(internalise(r), s.toList)
Chengsong
parents:
diff changeset
   485
  //def blexing(r: Rexp, s: String) : Val = decode(r, blex(internalise(r), s.toList))
Chengsong
parents:
diff changeset
   486
Chengsong
parents:
diff changeset
   487
  var bder_time = 0L
Chengsong
parents:
diff changeset
   488
  var bsimp_time = 0L
Chengsong
parents:
diff changeset
   489
  var mkepsBC_time = 0L
Chengsong
parents:
diff changeset
   490
  var small_de = 2
Chengsong
parents:
diff changeset
   491
  var big_de = 5
Chengsong
parents:
diff changeset
   492
  var usual_de = 3
Chengsong
parents:
diff changeset
   493
  
Chengsong
parents:
diff changeset
   494
  def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
Chengsong
parents:
diff changeset
   495
    case Nil => {
Chengsong
parents:
diff changeset
   496
      if (bnullable(r)) {
Chengsong
parents:
diff changeset
   497
        //println(asize(r))
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   498
        mkepsBC(r)
0
Chengsong
parents:
diff changeset
   499
      }
Chengsong
parents:
diff changeset
   500
    else throw new Exception("Not matched")
Chengsong
parents:
diff changeset
   501
    }
Chengsong
parents:
diff changeset
   502
    case c::cs => {
Chengsong
parents:
diff changeset
   503
      val der_res = bder(c,r)
Chengsong
parents:
diff changeset
   504
      val simp_res = bsimp(der_res)
Chengsong
parents:
diff changeset
   505
      blex_simp(simp_res, cs)      
Chengsong
parents:
diff changeset
   506
    }
Chengsong
parents:
diff changeset
   507
  }
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   508
  def super_blex_simp(r: ARexp, s: List[Char]): Bits = s match {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   509
    case Nil => {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   510
      if (bnullable(r)) {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   511
        mkepsBC(r)
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   512
      }
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   513
      else throw new Exception("Not matched")
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   514
    }
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   515
    case c::cs => {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   516
      super_blex_simp(super_bsimp(bder(c,r)), cs)
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   517
    }
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   518
  }
0
Chengsong
parents:
diff changeset
   519
  def blex_real_simp(r: ARexp, s: List[Char]): ARexp = s match{
Chengsong
parents:
diff changeset
   520
    case Nil => r
Chengsong
parents:
diff changeset
   521
    case c::cs => blex_real_simp(bsimp(bder(c, r)), cs)
Chengsong
parents:
diff changeset
   522
  }
Chengsong
parents:
diff changeset
   523
Chengsong
parents:
diff changeset
   524
Chengsong
parents:
diff changeset
   525
  //size: of a Aregx for testing purposes 
Chengsong
parents:
diff changeset
   526
  def size(r: Rexp) : Int = r match {
Chengsong
parents:
diff changeset
   527
    case ZERO => 1
Chengsong
parents:
diff changeset
   528
    case ONE => 1
Chengsong
parents:
diff changeset
   529
    case CHAR(_) => 1
Chengsong
parents:
diff changeset
   530
    case SEQ(r1, r2) => 1 + size(r1) + size(r2)
Chengsong
parents:
diff changeset
   531
    case ALTS(rs) => 1 + rs.map(size).sum
Chengsong
parents:
diff changeset
   532
    case STAR(r) => 1 + size(r)
Chengsong
parents:
diff changeset
   533
  }
Chengsong
parents:
diff changeset
   534
Chengsong
parents:
diff changeset
   535
  def asize(a: ARexp) = size(erase(a))
Chengsong
parents:
diff changeset
   536
Chengsong
parents:
diff changeset
   537
Chengsong
parents:
diff changeset
   538
  // decoding does not work yet
Chengsong
parents:
diff changeset
   539
  def blexing_simp(r: Rexp, s: String) = {
Chengsong
parents:
diff changeset
   540
    val bit_code = blex_simp(internalise(r), s.toList)
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   541
    decode(r, bit_code) 
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   542
  }
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   543
  def super_blexing_simp(r: Rexp, s: String) = {
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   544
    decode(r, super_blex_simp(internalise(r), s.toList))
0
Chengsong
parents:
diff changeset
   545
  }
Chengsong
parents:
diff changeset
   546
Chengsong
parents:
diff changeset
   547
Chengsong
parents:
diff changeset
   548
Chengsong
parents:
diff changeset
   549
Chengsong
parents:
diff changeset
   550
Chengsong
parents:
diff changeset
   551
  // Lexing Rules for a Small While Language
Chengsong
parents:
diff changeset
   552
Chengsong
parents:
diff changeset
   553
  //symbols
Chengsong
parents:
diff changeset
   554
  /*
Chengsong
parents:
diff changeset
   555
  val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_))
Chengsong
parents:
diff changeset
   556
  
Chengsong
parents:
diff changeset
   557
  //digits
Chengsong
parents:
diff changeset
   558
  val DIGIT = PRED("0123456789".contains(_))
Chengsong
parents:
diff changeset
   559
  //identifiers
Chengsong
parents:
diff changeset
   560
  val ID = SYM ~ (SYM | DIGIT).% 
Chengsong
parents:
diff changeset
   561
  //numbers
Chengsong
parents:
diff changeset
   562
  val NUM = STAR(DIGIT)
Chengsong
parents:
diff changeset
   563
  //keywords
Chengsong
parents:
diff changeset
   564
  val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false"
Chengsong
parents:
diff changeset
   565
  val AKEYWORD: Rexp = ALTS(List("skip" , "while" , "do" , "if" , "then" , "else" , "read" , "write" , "true" , "false"))
Chengsong
parents:
diff changeset
   566
  //semicolons
Chengsong
parents:
diff changeset
   567
  val SEMI: Rexp = ";"
Chengsong
parents:
diff changeset
   568
  //operators
Chengsong
parents:
diff changeset
   569
  val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/"
Chengsong
parents:
diff changeset
   570
  val AOP: Rexp = ALTS(List(":=" , "==" , "-" , "+" , "*" , "!=" , "<" , ">" , "<=" , ">=" , "%" , "/"))
Chengsong
parents:
diff changeset
   571
  //whitespaces
Chengsong
parents:
diff changeset
   572
  val WHITESPACE = PLUS(" " | "\n" | "\t")
Chengsong
parents:
diff changeset
   573
  //parentheses
Chengsong
parents:
diff changeset
   574
  val RPAREN: Rexp = ")"
Chengsong
parents:
diff changeset
   575
  val LPAREN: Rexp = "("
Chengsong
parents:
diff changeset
   576
  val BEGIN: Rexp = "{"
Chengsong
parents:
diff changeset
   577
  val END: Rexp = "}"
Chengsong
parents:
diff changeset
   578
  //strings...but probably needs not
Chengsong
parents:
diff changeset
   579
  val STRING: Rexp = "\"" ~ SYM.% ~ "\""
Chengsong
parents:
diff changeset
   580
Chengsong
parents:
diff changeset
   581
Chengsong
parents:
diff changeset
   582
Chengsong
parents:
diff changeset
   583
  val WHILE_REGS = (("k" $ KEYWORD) | 
Chengsong
parents:
diff changeset
   584
                    ("i" $ ID) | 
Chengsong
parents:
diff changeset
   585
                    ("o" $ OP) | 
Chengsong
parents:
diff changeset
   586
                    ("n" $ NUM) | 
Chengsong
parents:
diff changeset
   587
                    ("s" $ SEMI) | 
Chengsong
parents:
diff changeset
   588
                    ("str" $ STRING) |
Chengsong
parents:
diff changeset
   589
                    ("p" $ (LPAREN | RPAREN)) | 
Chengsong
parents:
diff changeset
   590
                    ("b" $ (BEGIN | END)) | 
Chengsong
parents:
diff changeset
   591
                    ("w" $ WHITESPACE)).%
Chengsong
parents:
diff changeset
   592
Chengsong
parents:
diff changeset
   593
  val AWHILE_REGS = (
Chengsong
parents:
diff changeset
   594
    ALTS(
Chengsong
parents:
diff changeset
   595
      List(
Chengsong
parents:
diff changeset
   596
        ("k" $ AKEYWORD), 
Chengsong
parents:
diff changeset
   597
                    ("i" $ ID),
Chengsong
parents:
diff changeset
   598
                    ("o" $ AOP) , 
Chengsong
parents:
diff changeset
   599
                    ("n" $ NUM) ,
Chengsong
parents:
diff changeset
   600
                    ("s" $ SEMI) ,
Chengsong
parents:
diff changeset
   601
                    ("str" $ STRING), 
Chengsong
parents:
diff changeset
   602
                    ("p" $ (LPAREN | RPAREN)), 
Chengsong
parents:
diff changeset
   603
                    ("b" $ (BEGIN | END)), 
Chengsong
parents:
diff changeset
   604
                    ("w" $ WHITESPACE)
Chengsong
parents:
diff changeset
   605
      )
Chengsong
parents:
diff changeset
   606
    )
Chengsong
parents:
diff changeset
   607
  ).%
Chengsong
parents:
diff changeset
   608
Chengsong
parents:
diff changeset
   609
*/
Chengsong
parents:
diff changeset
   610
Chengsong
parents:
diff changeset
   611
Chengsong
parents:
diff changeset
   612
  //--------------------------------------------------------------------------------------------------------START OF NON-BITCODE PART (TESTING)
Chengsong
parents:
diff changeset
   613
  /*
Chengsong
parents:
diff changeset
   614
  // Two Simple While programs
Chengsong
parents:
diff changeset
   615
  //========================
Chengsong
parents:
diff changeset
   616
  println("prog0 test")
Chengsong
parents:
diff changeset
   617
Chengsong
parents:
diff changeset
   618
  val prog0 = """read n"""
Chengsong
parents:
diff changeset
   619
  println(env(lexing_simp(WHILE_REGS, prog0)))
Chengsong
parents:
diff changeset
   620
  println(tokenise(WHILE_REGS, prog0))
Chengsong
parents:
diff changeset
   621
Chengsong
parents:
diff changeset
   622
  println("prog1 test")
Chengsong
parents:
diff changeset
   623
Chengsong
parents:
diff changeset
   624
  val prog1 = """read  n; write (n)"""
Chengsong
parents:
diff changeset
   625
  println(tokenise(WHILE_REGS, prog1))
Chengsong
parents:
diff changeset
   626
Chengsong
parents:
diff changeset
   627
  */
Chengsong
parents:
diff changeset
   628
  // Bigger Tests
Chengsong
parents:
diff changeset
   629
  //==============
Chengsong
parents:
diff changeset
   630
Chengsong
parents:
diff changeset
   631
  def escape(raw: String): String = {
Chengsong
parents:
diff changeset
   632
    import scala.reflect.runtime.universe._
Chengsong
parents:
diff changeset
   633
    Literal(Constant(raw)).toString
Chengsong
parents:
diff changeset
   634
  }
Chengsong
parents:
diff changeset
   635
Chengsong
parents:
diff changeset
   636
  val prog2 = """
Chengsong
parents:
diff changeset
   637
  write "Fib";
Chengsong
parents:
diff changeset
   638
  read n;
Chengsong
parents:
diff changeset
   639
  minus1 := 0;
Chengsong
parents:
diff changeset
   640
  minus2 := 1;
Chengsong
parents:
diff changeset
   641
  while n > 0 do {
Chengsong
parents:
diff changeset
   642
    temp := minus2;
Chengsong
parents:
diff changeset
   643
    minus2 := minus1 + minus2;
Chengsong
parents:
diff changeset
   644
    minus1 := temp;
Chengsong
parents:
diff changeset
   645
    n := n - 1
Chengsong
parents:
diff changeset
   646
  };
Chengsong
parents:
diff changeset
   647
  write "Result";
Chengsong
parents:
diff changeset
   648
  write minus2
Chengsong
parents:
diff changeset
   649
  """
Chengsong
parents:
diff changeset
   650
Chengsong
parents:
diff changeset
   651
  val prog3 = """
Chengsong
parents:
diff changeset
   652
  start := 1000;
Chengsong
parents:
diff changeset
   653
  x := start;
Chengsong
parents:
diff changeset
   654
  y := start;
Chengsong
parents:
diff changeset
   655
  z := start;
Chengsong
parents:
diff changeset
   656
  while 0 < x do {
Chengsong
parents:
diff changeset
   657
  while 0 < y do {
Chengsong
parents:
diff changeset
   658
    while 0 < z do {
Chengsong
parents:
diff changeset
   659
      z := z - 1
Chengsong
parents:
diff changeset
   660
    };
Chengsong
parents:
diff changeset
   661
    z := start;
Chengsong
parents:
diff changeset
   662
    y := y - 1
Chengsong
parents:
diff changeset
   663
  };     
Chengsong
parents:
diff changeset
   664
  y := start;
Chengsong
parents:
diff changeset
   665
  x := x - 1
Chengsong
parents:
diff changeset
   666
  }
Chengsong
parents:
diff changeset
   667
  """
Chengsong
parents:
diff changeset
   668
  /*
Chengsong
parents:
diff changeset
   669
  for(i <- 400 to 400 by 1){
Chengsong
parents:
diff changeset
   670
    println(i+":")
Chengsong
parents:
diff changeset
   671
    blexing_simp(WHILE_REGS, prog2 * i)
Chengsong
parents:
diff changeset
   672
  } */
Chengsong
parents:
diff changeset
   673
Chengsong
parents:
diff changeset
   674
    /*
Chengsong
parents:
diff changeset
   675
    for (i <- 2 to 5){
Chengsong
parents:
diff changeset
   676
      for(j <- 1 to 3){
Chengsong
parents:
diff changeset
   677
        println(i,j)
Chengsong
parents:
diff changeset
   678
        small_de = i
Chengsong
parents:
diff changeset
   679
        usual_de = i + j
Chengsong
parents:
diff changeset
   680
        big_de = i + 2*j 
Chengsong
parents:
diff changeset
   681
        blexing_simp(AWHILE_REGS, prog2 * 100)
Chengsong
parents:
diff changeset
   682
      }
Chengsong
parents:
diff changeset
   683
    }*/
Chengsong
parents:
diff changeset
   684
Chengsong
parents:
diff changeset
   685
  /*
Chengsong
parents:
diff changeset
   686
  println("Tokens of prog2")
Chengsong
parents:
diff changeset
   687
  println(tokenise(WHILE_REGS, prog2).mkString("\n"))
Chengsong
parents:
diff changeset
   688
Chengsong
parents:
diff changeset
   689
  val fib_tokens = tokenise(WHILE_REGS, prog2)
Chengsong
parents:
diff changeset
   690
  fib_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
Chengsong
parents:
diff changeset
   691
Chengsong
parents:
diff changeset
   692
Chengsong
parents:
diff changeset
   693
  val test_tokens = tokenise(WHILE_REGS, prog3)
Chengsong
parents:
diff changeset
   694
  test_tokens.map{case (s1, s2) => (escape(s1), escape(s2))}.mkString(",\n")
Chengsong
parents:
diff changeset
   695
  */
Chengsong
parents:
diff changeset
   696
Chengsong
parents:
diff changeset
   697
  /*
Chengsong
parents:
diff changeset
   698
  println("time test for blexing_simp")
Chengsong
parents:
diff changeset
   699
  for (i <- 1 to 1 by 1) {
Chengsong
parents:
diff changeset
   700
    lexing_simp(WHILE_REGS, prog2 * i)
Chengsong
parents:
diff changeset
   701
    blexing_simp(WHILE_REGS, prog2 * i)
Chengsong
parents:
diff changeset
   702
    for( j <- 0 to each_simp_timeb.length - 1){
Chengsong
parents:
diff changeset
   703
      if( each_simp_timeb(j)/each_simp_time(j) >= 10.0 )
Chengsong
parents:
diff changeset
   704
        println(j, each_simp_timeb(j), each_simp_time(j))
Chengsong
parents:
diff changeset
   705
    }
Chengsong
parents:
diff changeset
   706
  }
Chengsong
parents:
diff changeset
   707
  */
Chengsong
parents:
diff changeset
   708
Chengsong
parents:
diff changeset
   709
Chengsong
parents:
diff changeset
   710
  //--------------------------------------------------------------------------------------------------------END OF NON-BITCODE PART (TESTING)
Chengsong
parents:
diff changeset
   711
Chengsong
parents:
diff changeset
   712
Chengsong
parents:
diff changeset
   713
Chengsong
parents:
diff changeset
   714
  def clear() = {
Chengsong
parents:
diff changeset
   715
    print("")
Chengsong
parents:
diff changeset
   716
    //print("\33[H\33[2J")
Chengsong
parents:
diff changeset
   717
  }
Chengsong
parents:
diff changeset
   718
Chengsong
parents:
diff changeset
   719
  //testing the two lexings produce the same value
Chengsong
parents:
diff changeset
   720
  //enumerates strings of length n over alphabet cs
Chengsong
parents:
diff changeset
   721
  def strs(n: Int, cs: String) : Set[String] = {
Chengsong
parents:
diff changeset
   722
    if (n == 0) Set("")
Chengsong
parents:
diff changeset
   723
    else {
Chengsong
parents:
diff changeset
   724
      val ss = strs(n - 1, cs)
Chengsong
parents:
diff changeset
   725
      ss ++
Chengsong
parents:
diff changeset
   726
      (for (s <- ss; c <- cs.toList) yield c + s)
Chengsong
parents:
diff changeset
   727
    }
Chengsong
parents:
diff changeset
   728
  }
Chengsong
parents:
diff changeset
   729
  def enum(n: Int, s: String) : Stream[Rexp] = n match {
Chengsong
parents:
diff changeset
   730
    case 0 => ZERO #:: ONE #:: s.toStream.map(CHAR)
Chengsong
parents:
diff changeset
   731
    case n => {  
Chengsong
parents:
diff changeset
   732
      val rs = enum(n - 1, s)
Chengsong
parents:
diff changeset
   733
      rs #:::
Chengsong
parents:
diff changeset
   734
      (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) #:::
Chengsong
parents:
diff changeset
   735
      (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) #:::
Chengsong
parents:
diff changeset
   736
      (for (r1 <- rs) yield STAR(r1))
Chengsong
parents:
diff changeset
   737
    }
Chengsong
parents:
diff changeset
   738
  }
Chengsong
parents:
diff changeset
   739
Chengsong
parents:
diff changeset
   740
  //tests blexing and lexing
Chengsong
parents:
diff changeset
   741
  def tests_blexer_simp(ss: Set[String])(r: Rexp) = {
Chengsong
parents:
diff changeset
   742
    clear()
Chengsong
parents:
diff changeset
   743
    //println(s"Testing ${r}")
Chengsong
parents:
diff changeset
   744
    for (s <- ss.par) yield {
Chengsong
parents:
diff changeset
   745
      val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None)
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   746
      val res2 = Try(Some(super_blexing_simp(r, s))).getOrElse(None)
0
Chengsong
parents:
diff changeset
   747
      if (res1 != res2) println(s"Disagree on ${r} and ${s}")
Chengsong
parents:
diff changeset
   748
      if (res1 != res2) println(s"   ${res1} !=  ${res2}")
Chengsong
parents:
diff changeset
   749
      if (res1 != res2) Some((r, s)) else None
Chengsong
parents:
diff changeset
   750
    }
Chengsong
parents:
diff changeset
   751
  }
Chengsong
parents:
diff changeset
   752
Chengsong
parents:
diff changeset
   753
Chengsong
parents:
diff changeset
   754
5
622ddbb1223a correctness test with enumeration
Chengsong
parents: 3
diff changeset
   755
  
0
Chengsong
parents:
diff changeset
   756
  /*
Chengsong
parents:
diff changeset
   757
  def single_expression_explorer(ar: ARexp, ss: Set[String]): Unit = {
Chengsong
parents:
diff changeset
   758
    for (s <- ss){
Chengsong
parents:
diff changeset
   759
      
Chengsong
parents:
diff changeset
   760
      val der_res = bder(c, ar) 
Chengsong
parents:
diff changeset
   761
      val simp_res = bsimp(der_res)
Chengsong
parents:
diff changeset
   762
      println(asize(der_res))
Chengsong
parents:
diff changeset
   763
      println(asize(simp_res))
Chengsong
parents:
diff changeset
   764
      single_expression_explorer(simp_res, (sc - c))
Chengsong
parents:
diff changeset
   765
    }
Chengsong
parents:
diff changeset
   766
  }*/
Chengsong
parents:
diff changeset
   767
Chengsong
parents:
diff changeset
   768
  //single_expression_explorer(internalise(("c"~("a"+"b"))%) , Set('a','b','c'))
Chengsong
parents:
diff changeset
   769
Chengsong
parents:
diff changeset
   770
Chengsong
parents:
diff changeset
   771
}
Chengsong
parents:
diff changeset
   772
Chengsong
parents:
diff changeset
   773
import Rexp.Bits
Chengsong
parents:
diff changeset
   774
abstract class ARexp 
Chengsong
parents:
diff changeset
   775
case object AZERO extends ARexp
Chengsong
parents:
diff changeset
   776
case class AONE(bs: Bits) extends ARexp
Chengsong
parents:
diff changeset
   777
case class ACHAR(bs: Bits, f: Char) extends ARexp
Chengsong
parents:
diff changeset
   778
case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp 
Chengsong
parents:
diff changeset
   779
case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp 
Chengsong
parents:
diff changeset
   780
case class ASTAR(bs: Bits, r: ARexp) extends ARexp 
Chengsong
parents:
diff changeset
   781
Chengsong
parents:
diff changeset
   782
Chengsong
parents:
diff changeset
   783
Chengsong
parents:
diff changeset
   784
abstract class Val
Chengsong
parents:
diff changeset
   785
case object Empty extends Val
Chengsong
parents:
diff changeset
   786
case class Chr(c: Char) extends Val
Chengsong
parents:
diff changeset
   787
case class Sequ(v1: Val, v2: Val) extends Val
Chengsong
parents:
diff changeset
   788
case class Left(v: Val) extends Val
Chengsong
parents:
diff changeset
   789
case class Right(v: Val) extends Val
Chengsong
parents:
diff changeset
   790
case class Stars(vs: List[Val]) extends Val
Chengsong
parents:
diff changeset
   791
case class Rec(x: String, v: Val) extends Val
Chengsong
parents:
diff changeset
   792
//case class Pos(i: Int, v: Val) extends Val
Chengsong
parents:
diff changeset
   793
case object Prd extends Val