| author | Christian Urban <urbanc@in.tum.de> | 
| Sat, 05 May 2018 10:31:00 +0100 | |
| changeset 549 | 6f53ef9a9b21 | 
| parent 542 | 5f3b1e94da2c | 
| child 550 | a62357075346 | 
| permissions | -rw-r--r-- | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 1 | import scala.language.implicitConversions | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 2 | import scala.language.reflectiveCalls | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 3 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 4 | abstract class Rexp | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 5 | case object ZERO extends Rexp | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 6 | case object ONE extends Rexp | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 7 | case class CHAR(c: Char) extends Rexp | 
| 549 | 8 | case class ALTS(rs: List[Rexp]) extends Rexp | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 9 | case class SEQ(r1: Rexp, r2: Rexp) extends Rexp | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 10 | case class STAR(r: Rexp) extends Rexp | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 11 | |
| 549 | 12 | // ALT is now an abbreviation | 
| 13 | def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2)) | |
| 14 | ||
| 15 | // proj is now used instead for Left and Right | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 16 | abstract class Val | 
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 17 | case object Empty extends Val | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 18 | case class Chr(c: Char) extends Val | 
| 520 | 19 | case class Sequ(v1: Val, v2: Val) extends Val | 
| 549 | 20 | case class Proj(n: Int, v: Val) extends Val | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 21 | case class Stars(vs: List[Val]) extends Val | 
| 549 | 22 | case class Rec(s: String, v: Val) extends Val | 
| 23 | ||
| 24 | // for manipulating projections | |
| 25 | def IncProj(m: Int, v: Val) = v match {
 | |
| 26 | case Proj(n, v) => Proj(n + m, v) | |
| 27 | } | |
| 28 | ||
| 29 | def DecProj(m: Int, v: Val) = v match {
 | |
| 30 | case Proj(n, v) => Proj(n - m, v) | |
| 31 | } | |
| 32 | ||
| 33 | ||
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 34 | // some convenience for typing in regular expressions | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 35 | def charlist2rexp(s : List[Char]): Rexp = s match {
 | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 36 | case Nil => ONE | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 37 | case c::Nil => CHAR(c) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 38 | case c::s => SEQ(CHAR(c), charlist2rexp(s)) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 39 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 40 | implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 41 | |
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 42 | implicit def RexpOps(r: Rexp) = new {
 | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 43 | def | (s: Rexp) = ALT(r, s) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 44 | def % = STAR(r) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 45 | def ~ (s: Rexp) = SEQ(r, s) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 46 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 47 | |
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 48 | implicit def stringOps(s: String) = new {
 | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 49 | def | (r: Rexp) = ALT(s, r) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 50 | def | (r: String) = ALT(s, r) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 51 | def % = STAR(s) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 52 | def ~ (r: Rexp) = SEQ(s, r) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 53 | def ~ (r: String) = SEQ(s, r) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 54 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 55 | |
| 549 | 56 | // string of a regular expression - for testing purposes | 
| 57 | def string(r: Rexp) : String = r match {
 | |
| 58 | case ZERO => "0" | |
| 59 | case ONE => "1" | |
| 60 | case CHAR(c) => c.toString | |
| 61 |   case ALTS(rs) => rs.map(string).mkString("[", "|", "]")
 | |
| 62 |   case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}"
 | |
| 63 |   case SEQ(ONE, CHAR(c)) => s"1${c}"
 | |
| 64 |   case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
 | |
| 65 |   case STAR(r) => s"(${string(r)})*"
 | |
| 66 | } | |
| 67 | ||
| 68 | // size of a regular expression - for testing purposes | |
| 69 | def size(r: Rexp) : Int = r match {
 | |
| 70 | case ZERO => 1 | |
| 71 | case ONE => 1 | |
| 72 | case CHAR(_) => 1 | |
| 73 | case ALTS(rs) => 1 + rs.map(size).sum | |
| 74 | case SEQ(r1, r2) => 1 + size(r1) + size(r2) | |
| 75 | case STAR(r) => 1 + size(r) | |
| 76 | } | |
| 77 | ||
| 78 | ||
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 79 | // nullable function: tests whether the regular | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 80 | // expression can recognise the empty string | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 81 | def nullable (r: Rexp) : Boolean = r match {
 | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 82 | case ZERO => false | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 83 | case ONE => true | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 84 | case CHAR(_) => false | 
| 549 | 85 | case ALTS(rs) => rs.exists(nullable) | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 86 | case SEQ(r1, r2) => nullable(r1) && nullable(r2) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 87 | case STAR(_) => true | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 88 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 89 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 90 | // derivative of a regular expression w.r.t. a character | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 91 | def der (c: Char, r: Rexp) : Rexp = r match {
 | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 92 | case ZERO => ZERO | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 93 | case ONE => ZERO | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 94 | case CHAR(d) => if (c == d) ONE else ZERO | 
| 549 | 95 | case ALTS(rs) => ALTS(rs.map(der(c, _))) | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 96 | case SEQ(r1, r2) => | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 97 | if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 98 | else SEQ(der(c, r1), r2) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 99 | case STAR(r) => SEQ(der(c, r), STAR(r)) | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 100 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 101 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 102 | // derivative w.r.t. a string (iterates der) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 103 | def ders (s: List[Char], r: Rexp) : Rexp = s match {
 | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 104 | case Nil => r | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 105 | case c::s => ders(s, der(c, r)) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 106 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 107 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 108 | // extracts a string from value | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 109 | def flatten(v: Val) : String = v match {
 | 
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 110 | case Empty => "" | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 111 | case Chr(c) => c.toString | 
| 549 | 112 | case Proj(_, v) => flatten(v) | 
| 542 | 113 | case Sequ(v1, v2) => flatten(v1) ++ flatten(v2) | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 114 | case Stars(vs) => vs.map(flatten).mkString | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 115 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 116 | |
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 117 | |
| 549 | 118 | // mkeps | 
| 119 | def mkeps(r: Rexp) : Val = r match {
 | |
| 120 | case ONE => Empty | |
| 121 | case ALTS(r1::rs) => | |
| 122 | if (nullable(r1)) Proj(0, mkeps(r1)) | |
| 123 | else IncProj(1, mkeps(ALTS(rs))) | |
| 124 | case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) | |
| 125 | case STAR(r) => Stars(Nil) | |
| 126 | } | |
| 127 | ||
| 128 | // injection | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 129 | def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
 | 
| 520 | 130 | case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) | 
| 131 | case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) | |
| 549 | 132 | case (SEQ(r1, r2), Proj(0, Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) | 
| 133 | case (SEQ(r1, r2), Proj(1, v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) | |
| 134 | case (ALTS(rs), Proj(n, v1)) => Proj(n, inj(rs(n), c, v1)) | |
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 135 | case (CHAR(d), Empty) => Chr(c) | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 136 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 137 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 138 | // main lexing function (produces a value) | 
| 549 | 139 | // - does not simplify | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 140 | def lex(r: Rexp, s: List[Char]) : Val = s match {
 | 
| 549 | 141 |   case Nil => {
 | 
| 142 |     //println(s"Size of the last regex: ${size(r)}")
 | |
| 143 |     if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
 | |
| 144 | } | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 145 | case c::cs => inj(r, c, lex(der(c, r), cs)) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 146 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 147 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 148 | def lexing(r: Rexp, s: String) : Val = lex(r, s.toList) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 149 | |
| 549 | 150 | lexing("a" | ZERO, "a")
 | 
| 151 | lexing(ZERO | "a", "a") | |
| 450 
b93eaa833d31
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
426diff
changeset | 152 | |
| 
b93eaa833d31
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
426diff
changeset | 153 | lexing(("ab" | "a") ~ ("b" | ONE), "ab")
 | 
| 
b93eaa833d31
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
426diff
changeset | 154 | |
| 549 | 155 | |
| 156 | // removing duplicate regular expressions | |
| 157 | val unit = (ZERO, F_ERROR(_)) | |
| 158 | ||
| 159 | def dups2(xs: List[(Rexp, Val => Val)], | |
| 160 | 	 acc: List[(Rexp, Val => Val)] = Nil) : List[(Rexp, Val => Val)] = xs match {
 | |
| 161 | case Nil => acc | |
| 162 | case (x, y)::xs => | |
| 163 | if (acc.map(_._1).contains(x)) dups2(xs, acc :+ unit) | |
| 164 | else dups2(xs, acc :+ (x, y)) | |
| 165 | } | |
| 166 | ||
| 167 | def dups(xs: List[(Rexp, Val => Val)]) : List[(Rexp, Val => Val)] = {
 | |
| 168 | val out = dups2(xs) | |
| 169 |   //if (out != xs) {
 | |
| 170 | // println() | |
| 171 |   //  println(s"Input ${string(ALTS(xs.map(_._1)))}")
 | |
| 172 |   //  println(s"Ouput ${string(ALTS(out.map(_._1)))}")
 | |
| 173 | //} | |
| 174 | out | |
| 175 | } | |
| 176 | ||
| 177 | ||
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 178 | // some "rectification" functions for simplification | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 179 | def F_ID(v: Val): Val = v | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 180 | def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
 | 
| 520 | 181 | case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 182 | } | 
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 183 | def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = | 
| 520 | 184 | (v:Val) => Sequ(f1(Empty), f2(v)) | 
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 185 | def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = | 
| 520 | 186 | (v:Val) => Sequ(f1(v), f2(Empty)) | 
| 549 | 187 | def F_ERROR(v: Val): Val = throw new Exception("error")
 | 
| 188 | def F_PRINT(v: Val): Val = {
 | |
| 189 |   println(s"value is ${v}")
 | |
| 190 |   throw new Exception("caught error")
 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 191 | } | 
| 549 | 192 | |
| 193 | def flats(rs: List[Rexp], seen: Set[Rexp]) : (List[Rexp], Val => Val) = {
 | |
| 194 |   //println(s"I am flats: ${string(ALTS(rs))}")
 | |
| 195 |   //println(s"The size of seen is ${seen.size}, ${seen.map(string)}")
 | |
| 196 |   rs match {
 | |
| 197 | case Nil => (Nil, F_ERROR) | |
| 198 |   case r::rs1 if seen.contains(simp(r)._1) => {
 | |
| 199 |     //println(s" I remove ${string(r)}")
 | |
| 200 | val (rs2, f) = flats(rs1, seen) | |
| 201 | (rs2, (v:Val) => IncProj(1, f(v))) | |
| 202 | } | |
| 203 |   case ZERO::rs1 => {
 | |
| 204 | val (rs2, f) = flats(rs1, seen) | |
| 205 | (rs2, (v:Val) => IncProj(1, f(v))) | |
| 206 | } | |
| 207 |   case ALTS(rs0)::rs1 => {
 | |
| 208 | val (rss, f1) = flats(rs0, seen) | |
| 209 | val (rs2, f2) = flats(rs1, rss.toSet ++ seen) | |
| 210 |     (rss:::rs2, (v:Val) => v match {
 | |
| 211 | case Proj(n, vn) => | |
| 212 | if (n < rss.length) Proj(0, f1(Proj(n, vn))) | |
| 213 | else IncProj(1, f2(Proj(n - rss.length, vn))) | |
| 214 | }) | |
| 215 | } | |
| 216 |   case r1::rs2 => {
 | |
| 217 | val (r1s, f1) = simp(r1) | |
| 218 | val (rs3, f2) = flats(rs2, seen + r1s) | |
| 219 |     (r1s::rs3, (v:Val) => v match {
 | |
| 220 | case Proj(0, vn) => Proj(0, f1(vn)) | |
| 221 | case Proj(n, vn) => IncProj(1, f2(Proj(n - 1, vn))) | |
| 222 | }) | |
| 223 | } | |
| 224 | }} | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 225 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 226 | // simplification of regular expressions returning also an | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 227 | // rectification function; no simplification under STAR | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 228 | def simp(r: Rexp): (Rexp, Val => Val) = r match {
 | 
| 549 | 229 |   case ALTS(rs) => {
 | 
| 230 | val (rs_s, fs_s) = flats(rs, Set()) | |
| 231 |     rs_s match {
 | |
| 232 | case Nil => (ZERO, F_ERROR) | |
| 233 | case r::Nil => (r, (v:Val) => fs_s(Proj(0, v))) | |
| 234 | case rs_sd => (ALTS(rs_sd), fs_s) | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 235 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 236 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 237 |   case SEQ(r1, r2) => {
 | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 238 | val (r1s, f1s) = simp(r1) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 239 | val (r2s, f2s) = simp(r2) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 240 |     (r1s, r2s) match {
 | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 241 | case (ZERO, _) => (ZERO, F_ERROR) | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 242 | case (_, ZERO) => (ZERO, F_ERROR) | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 243 | case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 244 | case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) | 
| 549 | 245 | case (ALTS(rs), r2s) => (ALTS(rs.map(SEQ(_, r2s))), | 
| 246 | 			       (v:Val) => v match {
 | |
| 247 | case Proj(n, Sequ(v1, v2)) => Sequ(f1s(Proj(n, v1)), f2s(v2)) | |
| 248 | } | |
| 249 | ) | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 250 | case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 251 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 252 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 253 | case r => (r, F_ID) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 254 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 255 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 256 | def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
 | 
| 549 | 257 |   case Nil => {
 | 
| 258 |     //println(s"Size of the last regex: ${size(r)}")
 | |
| 259 |     //println(s"${string(r)}")
 | |
| 260 |     if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
 | |
| 261 | } | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 262 |   case c::cs => {
 | 
| 549 | 263 | val rd = der(c, r) | 
| 264 | val (r_simp, f_simp) = simp(rd) | |
| 265 |     //println(s"BEFORE ${string(rd)}")
 | |
| 266 |     //println(s"AFTER  ${string(r_simp)}")
 | |
| 267 | val rec = lex_simp(r_simp, cs) | |
| 268 | inj(r, c, f_simp(rec)) | |
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 269 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 270 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 271 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 272 | def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 273 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 274 | |
| 549 | 275 | lexing_simp("ab" | "aa", "ab")
 | 
| 276 | lexing_simp("ab" | "aa", "aa")
 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 277 | |
| 549 | 278 | lexing(STAR("a" | "aa"), "aaaaa")
 | 
| 279 | lexing_simp(STAR("a" | "aa"), "aaaaa")
 | |
| 450 
b93eaa833d31
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
426diff
changeset | 280 | |
| 549 | 281 | lexing(STAR("a" | "aa"), "aaaaaaaaaaa")
 | 
| 282 | lexing_simp(STAR("a" | "aa"), "aaaaaaaaaaa")
 | |
| 283 | ||
| 284 | lexing_simp(STAR("a" | "aa"), "a" * 2)
 | |
| 285 | lexing_simp(STAR("a" | "aa"), "a" * 3)
 | |
| 286 | lexing_simp(STAR("a" | "aa"), "a" * 4)
 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 287 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 288 | |
| 549 | 289 | lexing_simp(STAR("a" | "aa"), "a" * 20)
 | 
| 290 | lexing_simp(STAR("a" | "aa"), "a" * 2000)
 | |
| 291 | ||
| 292 | lexing(ALTS(List("aa", "aa", "aa", "ab", "ab")), "ab")
 | |
| 293 | lexing_simp(ALTS(List("aa", "aa", "aa", "ab", "ab")), "ab")
 | |
| 294 | ||
| 295 | lexing(ALTS(List(("aa" | "ab" | "aa"), "aa", "ab", "ab")), "ab")
 | |
| 296 | lexing_simp(ALTS(List(("aa" | "ab" | "aa"), "aa", "ab", "ab")), "ab")
 | |
| 297 | ||
| 298 | lexing(ALTS(List(ZERO, ZERO, ONE, "aa", ZERO, "aa", "aa")), "aa") | |
| 299 | lexing_simp(ALTS(List(ZERO, ZERO, ONE, "aa", ZERO, "aa", "aa")), "aa") | |
| 300 | ||
| 301 | lexing_simp(ONE | ZERO, "") | |
| 302 | lexing_simp(ZERO | ONE, "") | |
| 303 | ||
| 304 | lexing("a" | ZERO, "a")
 | |
| 305 | lexing_simp("a" | ZERO, "a")
 | |
| 306 | lexing(ZERO | "a", "a") | |
| 307 | lexing_simp(ZERO | "a", "a") | |
| 308 | ||
| 309 | lexing(ALTS(List(ZERO, ZERO, ONE, "a", ZERO, "a")), "a") | |
| 310 | lexing_simp(ALTS(List(ZERO, ZERO, ONE, "a", ZERO, "a")), "a") | |
| 311 | lexing(ALTS(List("a")), "a")
 | |
| 312 | lexing_simp(ALTS(List("a")), "a")
 | |
| 313 | ||
| 314 | lexing_simp(("a" | ZERO) | ZERO, "a")
 | |
| 315 | lexing_simp("a" | (ZERO | ZERO), "a")
 | |
| 316 | lexing_simp(ZERO | ("a" | ZERO), "a")
 | |
| 317 | ||
| 318 | ||
| 319 | lexing_simp("abc", "abc")
 | |
| 320 | ||
| 321 | lexing_simp("abc" | ONE, "abc")
 | |
| 322 | ||
| 323 | lexing(("a" | "ab") ~ ("b" | ""), "ab")
 | |
| 324 | lexing_simp(("a" | "ab") ~ ("b" | ""), "ab")
 | |
| 325 | lexing_simp(("ba" | "c" | "ab"), "ab")
 | |
| 326 | ||
| 327 | lexing(ALTS(List(ALTS(Nil), "c", "ab")), "ab") | |
| 328 | lexing_simp(ALTS(List(ALTS(Nil), "c", "ab")), "ab") | |
| 329 | ||
| 330 | lexing(ALTS(List(ALTS("ab"::Nil), "c", "ab")), "ab")
 | |
| 331 | lexing_simp(ALTS(List(ALTS("ab"::Nil), "c", "ab")), "ab")
 | |
| 332 | ||
| 333 | lexing(ALTS(List(ALTS(List("a","ab")), "c", "ab")), "ab")
 | |
| 334 | lexing_simp(ALTS(List(ALTS(List("a","ab")), "c", "ab")), "ab")
 | |
| 335 | ||
| 336 | lexing(ALTS(List(ALTS(List("ab","a")), "c", "ab")), "ab")
 | |
| 337 | lexing_simp(ALTS(List(ALTS(List("ab","a")), "c", "ab")), "ab")
 | |
| 338 | ||
| 339 | lexing(ALTS(List(ALTS(List("ab","a","a")), "c", "ab")), "ab")
 | |
| 340 | lexing_simp(ALTS(List(ALTS(List("ab","a","a")), "c", "ab")), "ab")
 | |
| 341 | ||
| 342 | lexing(ALTS(List(ALTS(List("a","ab","a")), "c", "ab")), "ab")
 | |
| 343 | lexing_simp(ALTS(List(ALTS(List("a","ab","a")), "c", "ab")), "ab")
 | |
| 344 | ||
| 345 | lexing(ALTS(List(ALTS(List("b","a","ab")), "c", "ab")), "ab")
 | |
| 346 | lexing_simp(ALTS(List(ALTS(List("b","a","ab")), "c", "ab")), "ab")
 | |
| 347 | ||
| 348 | ||
| 349 | lexing_simp(ALTS(List("ba", "c", "ab")), "ab")
 | |
| 350 | ||
| 351 | lexing(ALTS(List("a", "ab", "a")), "ab")
 | |
| 352 | lexing_simp(ALTS(List("a", "ab", "a")), "ab")
 | |
| 353 | ||
| 354 | lexing(STAR("a" | "aa"), "aa")
 | |
| 355 | lexing_simp(STAR("a" | "aa"), "aa")
 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 356 | |
| 542 | 357 | |
| 358 | ||
| 359 | ||
| 549 | 360 | def enum(n: Int, s: String) : Set[Rexp] = n match {
 | 
| 361 | case 0 => Set(ZERO, ONE) ++ s.toSet.map(CHAR) | |
| 362 |   case n => {
 | |
| 363 | val rs = enum(n - 1, s) | |
| 364 | rs ++ | |
| 365 | (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) ++ | |
| 366 | (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) ++ | |
| 367 | (for (r1 <- rs) yield STAR(r1)) | |
| 368 | } | |
| 542 | 369 | } | 
| 370 | ||
| 549 | 371 | def strs(n: Int, cs: String) : Set[String] = {
 | 
| 372 |   if (n == 0) Set("")
 | |
| 373 |   else {
 | |
| 374 | val ss = strs(n - 1, cs) | |
| 375 | ss ++ | |
| 376 | (for (s <- ss; c <- cs.toList) yield c + s) | |
| 377 | } | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 378 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 379 | |
| 549 | 380 | import scala.util.Try | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 381 | |
| 549 | 382 | def tests(n: Int, m: Int, s: String) = {
 | 
| 383 | val rs = enum(n, s) | |
| 384 | val ss = strs(m, s) | |
| 385 |   println(s"cases generated: ${rs.size} regexes and ${ss.size} strings")
 | |
| 386 |   for (r1 <- rs.par; s1 <- ss.par) yield {
 | |
| 387 | val res1 = Try(Some(lexing(r1, s1))).getOrElse(None) | |
| 388 | val res2 = Try(Some(lexing_simp(r1, s1))).getOrElse(None) | |
| 389 |     if (res1 != res2) println(s"Disagree on ${r1} and ${s1}")
 | |
| 390 | if (res1 != res2) Some((r1, s1)) else None | |
| 391 | } | |
| 392 | } | |
| 393 | ||
| 394 | println("Testing")
 | |
| 395 | println(tests(2,7,"abc")) | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 396 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 397 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 398 |