| author | Christian Urban <urbanc@in.tum.de> | 
| Mon, 04 Feb 2019 12:29:23 +0000 | |
| changeset 300 | b7987014fed8 | 
| parent 299 | cae7eab03018 | 
| child 305 | 6e2cef17a9b3 | 
| permissions | -rw-r--r-- | 
| 298 | 1 | |
| 2 | import scala.language.implicitConversions | |
| 3 | import scala.language.reflectiveCalls | |
| 4 | import scala.annotation.tailrec | |
| 5 | import scala.util.Try | |
| 6 | ||
| 7 | def escape(raw: String) : String = {
 | |
| 8 | import scala.reflect.runtime.universe._ | |
| 9 | Literal(Constant(raw)).toString | |
| 10 | } | |
| 11 | ||
| 12 | def esc2(r: (String, String)) = (escape(r._1), escape(r._2)) | |
| 13 | ||
| 14 | def distinctBy[B, C](xs: List[B], f: B => C, acc: List[C] = Nil): List[B] = xs match {
 | |
| 15 | case Nil => Nil | |
| 16 |   case (x::xs) => {
 | |
| 17 | val res = f(x) | |
| 18 | if (acc.contains(res)) distinctBy(xs, f, acc) | |
| 19 | else x::distinctBy(xs, f, res::acc) | |
| 20 | } | |
| 21 | } | |
| 22 | ||
| 23 | abstract class Bit | |
| 24 | case object Z extends Bit | |
| 25 | case object S extends Bit | |
| 26 | case class C(c: Char) extends Bit | |
| 27 | ||
| 28 | type Bits = List[Bit] | |
| 29 | ||
| 30 | // usual regular expressions | |
| 31 | abstract class Rexp | |
| 32 | case object ZERO extends Rexp | |
| 33 | case object ONE extends Rexp | |
| 34 | case class PRED(f: Char => Boolean) extends Rexp | |
| 35 | case class ALTS(rs: List[Rexp]) extends Rexp | |
| 36 | case class SEQ(r1: Rexp, r2: Rexp) extends Rexp | |
| 37 | case class STAR(r: Rexp) extends Rexp | |
| 38 | case class RECD(x: String, r: Rexp) extends Rexp | |
| 39 | ||
| 40 | ||
| 41 | // abbreviations | |
| 42 | def CHAR(c: Char) = PRED(_ == c) | |
| 43 | def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2)) | |
| 44 | def PLUS(r: Rexp) = SEQ(r, STAR(r)) | |
| 45 | ||
| 46 | // annotated regular expressions | |
| 47 | abstract class ARexp | |
| 48 | case object AZERO extends ARexp | |
| 49 | case class AONE(bs: Bits) extends ARexp | |
| 50 | case class APRED(bs: Bits, f: Char => Boolean) extends ARexp | |
| 51 | case class AALTS(bs: Bits, rs: List[ARexp]) extends ARexp | |
| 52 | case class ASEQ(bs: Bits, r1: ARexp, r2: ARexp) extends ARexp | |
| 53 | case class ASTAR(bs: Bits, r: ARexp) extends ARexp | |
| 54 | ||
| 55 | // abbreviations | |
| 56 | def AALT(bs: Bits, r1: ARexp, r2: ARexp) = AALTS(bs, List(r1, r2)) | |
| 57 | ||
| 58 | // values | |
| 59 | abstract class Val | |
| 60 | case object Empty extends Val | |
| 61 | case class Chr(c: Char) extends Val | |
| 62 | case class Sequ(v1: Val, v2: Val) extends Val | |
| 63 | case class Left(v: Val) extends Val | |
| 64 | case class Right(v: Val) extends Val | |
| 65 | case class Stars(vs: List[Val]) extends Val | |
| 66 | case class Rec(x: String, v: Val) extends Val | |
| 67 | ||
| 68 | ||
| 69 | ||
| 70 | // some convenience for typing in regular expressions | |
| 71 | def charlist2rexp(s : List[Char]): Rexp = s match {
 | |
| 72 | case Nil => ONE | |
| 73 | case c::Nil => CHAR(c) | |
| 74 | case c::s => SEQ(CHAR(c), charlist2rexp(s)) | |
| 75 | } | |
| 76 | implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) | |
| 77 | ||
| 78 | implicit def RexpOps(r: Rexp) = new {
 | |
| 79 | def | (s: Rexp) = ALT(r, s) | |
| 80 | def % = STAR(r) | |
| 81 | def ~ (s: Rexp) = SEQ(r, s) | |
| 82 | } | |
| 83 | ||
| 84 | implicit def stringOps(s: String) = new {
 | |
| 85 | def | (r: Rexp) = ALT(s, r) | |
| 86 | def | (r: String) = ALT(s, r) | |
| 87 | def % = STAR(s) | |
| 88 | def ~ (r: Rexp) = SEQ(s, r) | |
| 89 | def ~ (r: String) = SEQ(s, r) | |
| 90 | def $ (r: Rexp) = RECD(s, r) | |
| 91 | } | |
| 92 | ||
| 93 | ||
| 300 | 94 | // string of a regular expressions - for testing purposes | 
| 95 |   def string(r: Rexp): String = r match {
 | |
| 96 | case ZERO => "0" | |
| 97 | case ONE => "1" | |
| 98 | case PRED(_) => "_" | |
| 99 |     case ALTS(rs) => rs.map(string).mkString("[", "|", "]")
 | |
| 100 |     case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
 | |
| 101 |     case STAR(r) => s"{${string(r)}}*"
 | |
| 102 |     case RECD(x, r) => s"(${x}! ${string(r)})"
 | |
| 103 | } | |
| 104 | ||
| 298 | 105 | //-------------------------------------------------------------------------------------------------------- | 
| 106 | // START OF NON-BITCODE PART | |
| 107 | // | |
| 108 | ||
| 109 | // nullable function: tests whether the regular | |
| 110 | // expression can recognise the empty string | |
| 111 | def nullable (r: Rexp) : Boolean = r match {
 | |
| 112 | case ZERO => false | |
| 113 | case ONE => true | |
| 114 | case PRED(_) => false | |
| 115 | case ALTS(rs) => rs.exists(nullable) | |
| 116 | case SEQ(r1, r2) => nullable(r1) && nullable(r2) | |
| 117 | case STAR(_) => true | |
| 118 | case RECD(_, r) => nullable(r) | |
| 119 | } | |
| 120 | ||
| 121 | // derivative of a regular expression w.r.t. a character | |
| 122 | def der (c: Char, r: Rexp) : Rexp = r match {
 | |
| 123 | case ZERO => ZERO | |
| 124 | case ONE => ZERO | |
| 125 | case PRED(f) => if (f(c)) ONE else ZERO | |
| 126 | case ALTS(List(r1, r2)) => ALTS(List(der(c, r1), der(c, r2))) | |
| 127 | case SEQ(r1, r2) => | |
| 128 | if (nullable(r1)) ALTS(List(SEQ(der(c, r1), r2), der(c, r2))) | |
| 129 | else SEQ(der(c, r1), r2) | |
| 130 | case STAR(r) => SEQ(der(c, r), STAR(r)) | |
| 131 | case RECD(_, r1) => der(c, r1) | |
| 132 | } | |
| 133 | ||
| 134 | ||
| 135 | def flatten(v: Val) : String = v match {
 | |
| 136 | case Empty => "" | |
| 137 | case Chr(c) => c.toString | |
| 138 | case Left(v) => flatten(v) | |
| 139 | case Right(v) => flatten(v) | |
| 140 | case Sequ(v1, v2) => flatten(v1) + flatten(v2) | |
| 141 | case Stars(vs) => vs.map(flatten).mkString | |
| 142 | case Rec(_, v) => flatten(v) | |
| 143 | } | |
| 144 | ||
| 145 | // extracts an environment from a value | |
| 146 | def env(v: Val) : List[(String, String)] = v match {
 | |
| 147 | case Empty => Nil | |
| 148 | case Chr(c) => Nil | |
| 149 | case Left(v) => env(v) | |
| 150 | case Right(v) => env(v) | |
| 151 | case Sequ(v1, v2) => env(v1) ::: env(v2) | |
| 152 | case Stars(vs) => vs.flatMap(env) | |
| 153 | case Rec(x, v) => (x, flatten(v))::env(v) | |
| 154 | } | |
| 155 | ||
| 156 | ||
| 157 | // injection part | |
| 158 | def mkeps(r: Rexp) : Val = r match {
 | |
| 159 | case ONE => Empty | |
| 160 | case ALTS(List(r1, r2)) => | |
| 161 | if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) | |
| 162 | case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) | |
| 163 | case STAR(r) => Stars(Nil) | |
| 164 | case RECD(x, r) => Rec(x, mkeps(r)) | |
| 165 | } | |
| 166 | ||
| 167 | def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
 | |
| 168 | case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) | |
| 169 | case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) | |
| 170 | case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) | |
| 171 | case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) | |
| 172 | case (ALTS(List(r1, r2)), Left(v1)) => Left(inj(r1, c, v1)) | |
| 173 | case (ALTS(List(r1, r2)), Right(v2)) => Right(inj(r2, c, v2)) | |
| 174 | case (PRED(_), Empty) => Chr(c) | |
| 175 | case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) | |
| 176 | } | |
| 177 | ||
| 178 | // lexing without simplification | |
| 179 | def lex(r: Rexp, s: List[Char]) : Val = s match {
 | |
| 180 |   case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
 | |
| 181 | case c::cs => inj(r, c, lex(der(c, r), cs)) | |
| 182 | } | |
| 183 | ||
| 184 | def lexing(r: Rexp, s: String) : Val = lex(r, s.toList) | |
| 185 | ||
| 186 | //println(lexing(("ab" | "ab") ~ ("b" | ONE), "ab"))
 | |
| 187 | ||
| 188 | // some "rectification" functions for simplification | |
| 189 | def F_ID(v: Val): Val = v | |
| 190 | def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v)) | |
| 191 | def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v)) | |
| 192 | def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
 | |
| 193 | case Right(v) => Right(f2(v)) | |
| 194 | case Left(v) => Left(f1(v)) | |
| 195 | } | |
| 196 | def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
 | |
| 197 | case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) | |
| 198 | } | |
| 199 | def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = | |
| 200 | (v:Val) => Sequ(f1(Empty), f2(v)) | |
| 201 | def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = | |
| 202 | (v:Val) => Sequ(f1(v), f2(Empty)) | |
| 203 | def F_RECD(f: Val => Val) = (v:Val) => v match {
 | |
| 204 | case Rec(x, v) => Rec(x, f(v)) | |
| 205 | } | |
| 206 | def F_ERROR(v: Val): Val = throw new Exception("error")
 | |
| 207 | ||
| 208 | // simplification of regular expressions returning also an | |
| 209 | // rectification function; no simplification under STAR | |
| 210 | def simp(r: Rexp): (Rexp, Val => Val) = r match {
 | |
| 211 |   case ALTS(List(r1, r2)) => {
 | |
| 212 | val (r1s, f1s) = simp(r1) | |
| 213 | val (r2s, f2s) = simp(r2) | |
| 214 |     (r1s, r2s) match {
 | |
| 215 | case (ZERO, _) => (r2s, F_RIGHT(f2s)) | |
| 216 | case (_, ZERO) => (r1s, F_LEFT(f1s)) | |
| 217 | case _ => if (r1s == r2s) (r1s, F_LEFT(f1s)) | |
| 218 | else (ALTS(List(r1s, r2s)), F_ALT(f1s, f2s)) | |
| 219 | } | |
| 220 | } | |
| 221 |   case SEQ(r1, r2) => {
 | |
| 222 | val (r1s, f1s) = simp(r1) | |
| 223 | val (r2s, f2s) = simp(r2) | |
| 224 |     (r1s, r2s) match {
 | |
| 225 | case (ZERO, _) => (ZERO, F_ERROR) | |
| 226 | case (_, ZERO) => (ZERO, F_ERROR) | |
| 227 | case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) | |
| 228 | case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) | |
| 229 | case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) | |
| 230 | } | |
| 231 | } | |
| 232 |   case RECD(x, r1) => {
 | |
| 233 | val (r1s, f1s) = simp(r1) | |
| 234 | (RECD(x, r1s), F_RECD(f1s)) | |
| 235 | } | |
| 236 | case r => (r, F_ID) | |
| 237 | } | |
| 238 | ||
| 299 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 239 | def ders_simp(s: List[Char], r: Rexp) : Rexp = s match {
 | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 240 | case Nil => r | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 241 | case c::s => ders_simp(s, simp(der(c, r))._1) | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 242 | } | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 243 | |
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 244 | |
| 298 | 245 | def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
 | 
| 246 |   case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
 | |
| 247 |   case c::cs => {
 | |
| 248 | val (r_simp, f_simp) = simp(der(c, r)) | |
| 249 | inj(r, c, f_simp(lex_simp(r_simp, cs))) | |
| 250 | } | |
| 251 | } | |
| 252 | ||
| 253 | def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList) | |
| 254 | ||
| 255 | println(lexing_simp(("a" | "ab") ~ ("b" | ""), "ab"))
 | |
| 256 | ||
| 257 | ||
| 258 | def tokenise_simp(r: Rexp, s: String) = env(lexing_simp(r, s)).map(esc2) | |
| 259 | ||
| 260 | //-------------------------------------------------------------------------------------------------------- | |
| 261 | // BITCODED PART | |
| 262 | ||
| 263 | ||
| 264 | def fuse(bs: Bits, r: ARexp) : ARexp = r match {
 | |
| 265 | case AZERO => AZERO | |
| 266 | case AONE(cs) => AONE(bs ++ cs) | |
| 267 | case APRED(cs, f) => APRED(bs ++ cs, f) | |
| 268 | case AALTS(cs, rs) => AALTS(bs ++ cs, rs) | |
| 269 | case ASEQ(cs, r1, r2) => ASEQ(bs ++ cs, r1, r2) | |
| 270 | case ASTAR(cs, r) => ASTAR(bs ++ cs, r) | |
| 271 | } | |
| 272 | ||
| 273 | // translation into ARexps | |
| 274 | def internalise(r: Rexp) : ARexp = r match {
 | |
| 275 | case ZERO => AZERO | |
| 276 | case ONE => AONE(Nil) | |
| 277 | case PRED(f) => APRED(Nil, f) | |
| 278 | case ALTS(List(r1, r2)) => | |
| 279 | AALTS(Nil, List(fuse(List(Z), internalise(r1)), fuse(List(S), internalise(r2)))) | |
| 280 |   case ALTS(r1::rs) => {
 | |
| 281 | val AALTS(Nil, rs2) = internalise(ALTS(rs)) | |
| 282 | AALTS(Nil, fuse(List(Z), internalise(r1)) :: rs2.map(fuse(List(S), _))) | |
| 283 | } | |
| 284 | case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2)) | |
| 285 | case STAR(r) => ASTAR(Nil, internalise(r)) | |
| 286 | case RECD(x, r) => internalise(r) | |
| 287 | } | |
| 288 | ||
| 289 | internalise(("a" | "ab") ~ ("b" | ""))
 | |
| 290 | ||
| 291 | // decoding of values from bit sequences | |
| 292 | def decode_aux(r: Rexp, bs: Bits) : (Val, Bits) = (r, bs) match {
 | |
| 293 | case (ONE, bs) => (Empty, bs) | |
| 294 | case (PRED(f), C(c)::bs) => (Chr(c), bs) | |
| 295 | case (ALTS(r::Nil), bs) => decode_aux(r, bs) | |
| 296 |   case (ALTS(rs), bs) => bs match {
 | |
| 297 |     case Z::bs1 => {
 | |
| 298 | val (v, bs2) = decode_aux(rs.head, bs1) | |
| 299 | (Left(v), bs2) | |
| 300 | } | |
| 301 |     case S::bs1 => {
 | |
| 302 | val (v, bs2) = decode_aux(ALTS(rs.tail), bs1) | |
| 303 | (Right(v), bs2) | |
| 304 | } | |
| 305 | } | |
| 306 |   case (SEQ(r1, r2), bs) => {
 | |
| 307 | val (v1, bs1) = decode_aux(r1, bs) | |
| 308 | val (v2, bs2) = decode_aux(r2, bs1) | |
| 309 | (Sequ(v1, v2), bs2) | |
| 310 | } | |
| 311 |   case (STAR(r1), S::bs) => {
 | |
| 312 | val (v, bs1) = decode_aux(r1, bs) | |
| 313 | val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1) | |
| 314 | (Stars(v::vs), bs2) | |
| 315 | } | |
| 316 | case (STAR(_), Z::bs) => (Stars(Nil), bs) | |
| 317 |   case (RECD(x, r1), bs) => {
 | |
| 318 | val (v, bs1) = decode_aux(r1, bs) | |
| 319 | (Rec(x, v), bs1) | |
| 320 | } | |
| 321 | } | |
| 322 | ||
| 323 | def decode(r: Rexp, bs: Bits) = decode_aux(r, bs) match {
 | |
| 324 | case (v, Nil) => v | |
| 325 |   case _ => throw new Exception("Not decodable")
 | |
| 326 | } | |
| 327 | ||
| 328 | ||
| 329 | //erase function: extracts a Rexp from Arexp | |
| 330 | def erase(r: ARexp) : Rexp = r match{
 | |
| 331 | case AZERO => ZERO | |
| 332 | case AONE(_) => ONE | |
| 333 | case APRED(bs, f) => PRED(f) | |
| 334 | case AALTS(bs, rs) => ALTS(rs.map(erase(_))) | |
| 335 | case ASEQ(bs, r1, r2) => SEQ (erase(r1), erase(r2)) | |
| 336 | case ASTAR(cs, r)=> STAR(erase(r)) | |
| 337 | } | |
| 338 | ||
| 339 | ||
| 340 | // bnullable function: tests whether the aregular | |
| 341 | // expression can recognise the empty string | |
| 342 | def bnullable (r: ARexp) : Boolean = r match {
 | |
| 343 | case AZERO => false | |
| 344 | case AONE(_) => true | |
| 345 | case APRED(_,_) => false | |
| 346 | case AALTS(_, rs) => rs.exists(bnullable) | |
| 347 | case ASEQ(_, r1, r2) => bnullable(r1) && bnullable(r2) | |
| 348 | case ASTAR(_, _) => true | |
| 349 | } | |
| 350 | ||
| 351 | def bmkeps(r: ARexp) : Bits = r match {
 | |
| 352 | case AONE(bs) => bs | |
| 353 |   case AALTS(bs, rs) => {
 | |
| 354 | val n = rs.indexWhere(bnullable) | |
| 355 | bs ++ bmkeps(rs(n)) | |
| 356 | } | |
| 357 | case ASEQ(bs, r1, r2) => bs ++ bmkeps(r1) ++ bmkeps(r2) | |
| 358 | case ASTAR(bs, r) => bs ++ List(Z) | |
| 359 | } | |
| 360 | ||
| 361 | // derivative of a regular expression w.r.t. a character | |
| 362 | def bder(c: Char, r: ARexp) : ARexp = r match {
 | |
| 363 | case AZERO => AZERO | |
| 364 | case AONE(_) => AZERO | |
| 365 | case APRED(bs, f) => if (f(c)) AONE(bs:::List(C(c))) else AZERO | |
| 366 | case AALTS(bs, rs) => AALTS(bs, rs.map(bder(c, _))) | |
| 367 | case ASEQ(bs, r1, r2) => | |
| 368 | if (bnullable(r1)) AALT(bs, ASEQ(Nil, bder(c, r1), r2), fuse(bmkeps(r1), bder(c, r2))) | |
| 369 | else ASEQ(bs, bder(c, r1), r2) | |
| 370 | case ASTAR(bs, r) => ASEQ(bs, fuse(List(S), bder(c, r)), ASTAR(Nil, r)) | |
| 371 | } | |
| 372 | ||
| 373 | ||
| 374 | // derivative w.r.t. a string (iterates bder) | |
| 375 | @tailrec | |
| 376 | def bders (s: List[Char], r: ARexp) : ARexp = s match {
 | |
| 377 | case Nil => r | |
| 378 | case c::s => bders(s, bder(c, r)) | |
| 379 | } | |
| 380 | ||
| 381 | ||
| 382 | def flats(rs: List[ARexp]): List[ARexp] = rs match {
 | |
| 383 | case Nil => Nil | |
| 384 | case AZERO :: rs1 => flats(rs1) | |
| 385 | case AALTS(bs, rs1) :: rs2 => rs1.map(fuse(bs, _)) ::: flats(rs2) | |
| 386 | case r1 :: rs2 => r1 :: flats(rs2) | |
| 387 | } | |
| 388 | ||
| 389 | def bsimp(r: ARexp): ARexp = r match {
 | |
| 390 |   case ASEQ(bs1, r1, r2) => (bsimp(r1), bsimp(r2)) match {
 | |
| 391 | case (AZERO, _) => AZERO | |
| 392 | case (_, AZERO) => AZERO | |
| 393 | case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) | |
| 394 | case (r1s, r2s) => ASEQ(bs1, r1s, r2s) | |
| 395 | } | |
| 396 |   case AALTS(bs1, rs) => distinctBy(flats(rs.map(bsimp)), erase) match {
 | |
| 397 | case Nil => AZERO | |
| 300 | 398 | case r :: Nil => fuse(bs1, r) | 
| 298 | 399 | case rs => AALTS(bs1, rs) | 
| 400 | } | |
| 300 | 401 | //case ASTAR(bs1, r1) => ASTAR(bs1, bsimp(r1)) | 
| 298 | 402 | case r => r | 
| 403 | } | |
| 404 | ||
| 300 | 405 | |
| 406 | ||
| 298 | 407 | def bders_simp (s: List[Char], r: ARexp) : ARexp = s match {
 | 
| 408 | case Nil => r | |
| 409 | case c::s => bders_simp(s, bsimp(bder(c, r))) | |
| 410 | } | |
| 411 | ||
| 412 | def blex_simp(r: ARexp, s: List[Char]) : Bits = s match {
 | |
| 413 | case Nil => if (bnullable(r)) bmkeps(r) | |
| 414 | 	      else throw new Exception("Not matched")
 | |
| 415 | case c::cs => blex_simp(bsimp(bder(c, r)), cs) | |
| 416 | } | |
| 417 | ||
| 418 | ||
| 419 | def blexing_simp(r: Rexp, s: String) : Val = | |
| 299 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 420 | decode(r, blex_simp(internalise(r), s.toList)) | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 421 | |
| 298 | 422 | |
| 423 | def btokenise_simp(r: Rexp, s: String) = env(blexing_simp(r, s)).map(esc2) | |
| 424 | ||
| 425 | ||
| 426 | ||
| 300 | 427 | // INCLUDING SIMPLIFICATION UNDER STARS | 
| 428 | ||
| 429 | def bsimp_full(r: ARexp): ARexp = r match {
 | |
| 430 |   case ASEQ(bs1, r1, r2) => (bsimp_full(r1), bsimp_full(r2)) match {
 | |
| 431 | case (AZERO, _) => AZERO | |
| 432 | case (_, AZERO) => AZERO | |
| 433 | case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) | |
| 434 | case (r1s, r2s) => ASEQ(bs1, r1s, r2s) | |
| 435 | } | |
| 436 |   case AALTS(bs1, rs) => distinctBy(flats(rs.map(bsimp_full)), erase) match {
 | |
| 437 | case Nil => AZERO | |
| 438 | case r :: Nil => fuse(bs1, r) | |
| 439 | case rs => AALTS(bs1, rs) | |
| 440 | } | |
| 441 | case ASTAR(bs1, r1) => ASTAR(bs1, bsimp_full(r1)) | |
| 442 | case r => r | |
| 443 | } | |
| 444 | ||
| 445 | def bders_simp_full(s: List[Char], r: ARexp) : ARexp = s match {
 | |
| 446 | case Nil => r | |
| 447 | case c::s => bders_simp_full(s, bsimp_full(bder(c, r))) | |
| 448 | } | |
| 449 | ||
| 450 | def blex_simp_full(r: ARexp, s: List[Char]) : Bits = s match {
 | |
| 451 | case Nil => if (bnullable(r)) bmkeps(r) | |
| 452 | 	      else throw new Exception("Not matched")
 | |
| 453 | case c::cs => blex_simp_full(bsimp_full(bder(c, r)), cs) | |
| 454 | } | |
| 455 | ||
| 456 | ||
| 457 | def blexing_simp_full(r: Rexp, s: String) : Val = | |
| 458 | decode(r, blex_simp_full(internalise(r), s.toList)) | |
| 459 | ||
| 460 | ||
| 461 | def btokenise_simp_full(r: Rexp, s: String) = env(blexing_simp_full(r, s)).map(esc2) | |
| 462 | ||
| 463 | ||
| 299 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 464 | |
| 298 | 465 | // Testing | 
| 466 | //============ | |
| 467 | ||
| 468 | def time[T](code: => T) = {
 | |
| 469 | val start = System.nanoTime() | |
| 470 | val result = code | |
| 471 | val end = System.nanoTime() | |
| 472 | ((end - start)/1.0e9).toString | |
| 473 | //result | |
| 474 | } | |
| 475 | ||
| 299 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 476 | def timeR[T](code: => T) = {
 | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 477 | val start = System.nanoTime() | 
| 300 | 478 | for (i <- 1 to 10) code | 
| 299 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 479 | val result = code | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 480 | val end = System.nanoTime() | 
| 300 | 481 | (result, (end - start)) | 
| 299 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 482 | } | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 483 | |
| 298 | 484 | //size: of a Aregx for testing purposes | 
| 485 | def size(r: Rexp) : Int = r match {
 | |
| 486 | case ZERO => 1 | |
| 487 | case ONE => 1 | |
| 488 | case PRED(_) => 1 | |
| 489 | case SEQ(r1, r2) => 1 + size(r1) + size(r2) | |
| 490 | case ALTS(rs) => 1 + rs.map(size).sum | |
| 491 | case STAR(r) => 1 + size(r) | |
| 299 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 492 | case RECD(_, r) => size(r) | 
| 298 | 493 | } | 
| 494 | ||
| 495 | def asize(a: ARexp) = size(erase(a)) | |
| 496 | ||
| 497 | ||
| 498 | // Lexing Rules for a Small While Language | |
| 499 | ||
| 500 | //symbols | |
| 501 | val SYM = PRED("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(_))
 | |
| 502 | //digits | |
| 503 | val DIGIT = PRED("0123456789".contains(_))
 | |
| 504 | //identifiers | |
| 505 | val ID = SYM ~ (SYM | DIGIT).% | |
| 506 | //numbers | |
| 507 | val NUM = STAR(DIGIT) | |
| 508 | //keywords | |
| 509 | val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false" | |
| 510 | //semicolons | |
| 511 | val SEMI: Rexp = ";" | |
| 512 | //operators | |
| 513 | val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/" | |
| 514 | //whitespaces | |
| 515 | val WHITESPACE = PLUS(" " | "\n" | "\t")
 | |
| 516 | //parentheses | |
| 517 | val RPAREN: Rexp = ")" | |
| 518 | val LPAREN: Rexp = "("
 | |
| 519 | val BEGIN: Rexp = "{"
 | |
| 520 | val END: Rexp = "}" | |
| 521 | //strings...but probably needs not | |
| 522 | val STRING: Rexp = "\"" ~ SYM.% ~ "\"" | |
| 523 | ||
| 524 | ||
| 525 | ||
| 526 | val WHILE_REGS = (("k" $ KEYWORD) | 
 | |
| 527 |                   ("i" $ ID) | 
 | |
| 528 |                   ("o" $ OP) | 
 | |
| 529 |                   ("n" $ NUM) | 
 | |
| 530 |                   ("s" $ SEMI) | 
 | |
| 531 |                   ("str" $ STRING) |
 | |
| 532 |                   ("p" $ (LPAREN | RPAREN)) | 
 | |
| 533 |                   ("b" $ (BEGIN | END)) | 
 | |
| 534 |                   ("w" $ WHITESPACE)).%
 | |
| 535 | ||
| 536 | ||
| 537 | // Some Small Tests | |
| 538 | //================== | |
| 539 | ||
| 540 | /* | |
| 541 | println("simple tests:")
 | |
| 542 | println(blexing_simp((SYM.%), "abcd")) | |
| 543 | println(blexing_simp(((SYM.%) | ((SYM.% | NUM).%)), "12345")) | |
| 544 | println(blexing_simp((WHILE_REGS), "abcd")) | |
| 545 | println(blexing_simp((WHILE_REGS), "12345")) | |
| 546 | println(blexing_simp((WHILE_REGS), """write "Fib";""")) | |
| 547 | */ | |
| 548 | ||
| 549 | ||
| 550 | // Bigger Tests | |
| 551 | //============== | |
| 552 | ||
| 553 | ||
| 554 | val fib_prog = """ | |
| 555 | write "Fib"; | |
| 556 | read n; | |
| 557 | minus1 := 0; | |
| 558 | minus2 := 1; | |
| 559 | while n > 0 do {
 | |
| 560 | temp := minus2; | |
| 561 | minus2 := minus1 + minus2; | |
| 562 | minus1 := temp; | |
| 563 | n := n - 1 | |
| 564 | }; | |
| 565 | write "Result"; | |
| 566 | write minus2 | |
| 567 | """ | |
| 568 | ||
| 569 | ||
| 570 | println("fib prog tests :")
 | |
| 571 | println(tokenise_simp(WHILE_REGS, fib_prog)) | |
| 572 | println(btokenise_simp(WHILE_REGS, fib_prog)) | |
| 573 | println("equal? " + (tokenise_simp(WHILE_REGS, fib_prog) == btokenise_simp(WHILE_REGS, fib_prog)))
 | |
| 574 | ||
| 300 | 575 | for (i <- 1 to 20) {
 | 
| 299 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 576 |   print("Old: " + time(tokenise_simp(WHILE_REGS, fib_prog * i)))
 | 
| 300 | 577 |   print(" Bit: " + time(btokenise_simp(WHILE_REGS, fib_prog * i)))
 | 
| 578 |   println(" Bit full simp: " + time(btokenise_simp_full(WHILE_REGS, fib_prog * i)))
 | |
| 299 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 579 | } | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 580 | |
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 581 | println("Original " + size(WHILE_REGS))
 | 
| 300 | 582 | println("Size Bit  " + asize(bders_simp((fib_prog * 10).toList, internalise(WHILE_REGS))))
 | 
| 583 | println("Size Bitf " + asize(bders_simp_full((fib_prog * 10).toList, internalise(WHILE_REGS))))
 | |
| 584 | println("Size Old  " + size(ders_simp((fib_prog * 10).toList, WHILE_REGS)))
 | |
| 298 | 585 | |
| 586 | ||
| 300 | 587 | //System.exit(0) | 
| 588 | ||
| 299 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 589 | println("Internal sizes test OK or strange")
 | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 590 | |
| 300 | 591 | def perc(p1: Double, p2: Double) : String = | 
| 592 |   f"${(((p1 - p2) / p2) * 100.0) }%5.0f" + "%"
 | |
| 593 | ||
| 299 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 594 | def ders_test(n: Int, s: List[Char], r: Rexp, a: ARexp) : (Rexp, ARexp) = s match {
 | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 595 | case Nil => (r, a) | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 596 |   case c::s => {
 | 
| 300 | 597 | // derivative | 
| 598 | val (rd1, tr1) = timeR(der(c, r)) | |
| 599 | val (ad1, ta1) = timeR(bder(c, a)) | |
| 600 |     val trs1 = f"${tr1}%.5f"
 | |
| 601 |     val tas1 = f"${ta1}%.5f"
 | |
| 602 |     if (tr1 < ta1) println(s"Time strange der  (step) ${n} ${perc(ta1, tr1)} sizes  der ${size(rd1)} ${asize(ad1)}")
 | |
| 603 | //simplification | |
| 604 | val (rd, tr) = timeR(simp(rd1)._1) | |
| 605 | val (ad, ta) = timeR(bsimp(ad1)) | |
| 299 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 606 |     val trs = f"${tr}%.5f"
 | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 607 |     val tas = f"${ta}%.5f"
 | 
| 300 | 608 | //full simplification | 
| 609 | val (adf, taf) = timeR(bsimp_full(ad1)) | |
| 610 |     if (tr < ta) println(s"Time strange simp (step) ${n} ${perc(ta, tr)} sizes simp ${size(rd)} ${asize(ad)}")
 | |
| 611 |     if (n == 1749 || n == 1734) {
 | |
| 612 |       println{s"Aregex before bder (size: ${asize(a)})\n ${string(erase(a))}"}
 | |
| 613 |       println{s"Aregex after bder (size: ${asize(ad1)})\n ${string(erase(ad1))}"}
 | |
| 614 |       println{s"Aregex after bsimp (size: ${asize(ad)})\n ${string(erase(ad))}"}
 | |
| 615 |       println{s"Aregex after bsimp_full (size: ${asize(adf)})\n ${string(erase(adf))}"}
 | |
| 616 | } | |
| 299 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 617 | ders_test(n + 1, s, rd, ad) | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 618 | } | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 619 | } | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 620 | |
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 621 | val prg = (fib_prog * 10).toList | 
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 622 | ders_test(0, prg, WHILE_REGS, internalise(WHILE_REGS)) | 
| 298 | 623 | |
| 624 | ||
| 625 | //testing the two lexings produce the same value | |
| 626 | //enumerates strings of length n over alphabet cs | |
| 627 | def strs(n: Int, cs: String) : Set[String] = {
 | |
| 628 |   if (n == 0) Set("")
 | |
| 629 |   else {
 | |
| 630 | val ss = strs(n - 1, cs) | |
| 631 | ss ++ | |
| 632 | (for (s <- ss; c <- cs.toList) yield c + s) | |
| 633 | } | |
| 634 | } | |
| 635 | def enum(n: Int, s: String) : Stream[Rexp] = n match {
 | |
| 636 | case 0 => ZERO #:: ONE #:: s.toStream.map(CHAR) | |
| 637 |   case n => {  
 | |
| 638 | val rs = enum(n - 1, s) | |
| 639 | rs #::: | |
| 640 | (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) #::: | |
| 641 | (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) #::: | |
| 642 | (for (r1 <- rs) yield STAR(r1)) | |
| 643 | } | |
| 644 | } | |
| 645 | ||
| 646 | //tests blexing and lexing | |
| 647 | def tests(ss: Set[String])(r: Rexp) = {
 | |
| 648 |   //println(s"Testing ${r}")
 | |
| 649 |   for (s <- ss.par) yield {
 | |
| 650 | val res1 = Try(Some(lexing_simp(r, s))).getOrElse(None) | |
| 651 | val res2 = Try(Some(blexing_simp(r, s))).getOrElse(None) | |
| 652 | if (res1 != res2) | |
| 653 |       { println(s"Disagree on ${r} and ${s}")
 | |
| 654 | 	println(s"   ${res1} !=  ${res2}")
 | |
| 655 | Some((r, s)) } else None | |
| 656 | } | |
| 657 | } | |
| 658 | ||
| 659 | ||
| 660 | println("Partial searching: ")
 | |
| 661 | enum(2, "abc").map(tests(strs(3, "abc"))).toSet | |
| 299 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 662 | |
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 663 | |
| 
cae7eab03018
added some timing and size tests when doing the derivatives
 Christian Urban <urbanc@in.tum.de> parents: 
298diff
changeset | 664 |