| author | Christian Urban <urbanc@in.tum.de> | 
| Sun, 23 Sep 2018 09:04:24 +0100 | |
| changeset 557 | 6d0e8b6f4243 | 
| parent 552 | 8a79cc0b277c | 
| child 579 | eb9ef7b96f4a | 
| permissions | -rw-r--r-- | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 1 | import scala.language.implicitConversions | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 2 | import scala.language.reflectiveCalls | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 3 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 4 | abstract class Rexp | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 5 | case object ZERO extends Rexp | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 6 | case object ONE extends Rexp | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 7 | case class CHAR(c: Char) extends Rexp | 
| 549 | 8 | case class ALTS(rs: List[Rexp]) extends Rexp | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 9 | case class SEQ(r1: Rexp, r2: Rexp) extends Rexp | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 10 | case class STAR(r: Rexp) extends Rexp | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 11 | |
| 549 | 12 | // ALT is now an abbreviation | 
| 13 | def ALT(r1: Rexp, r2: Rexp) = ALTS(List(r1, r2)) | |
| 14 | ||
| 15 | // proj is now used instead for Left and Right | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 16 | abstract class Val | 
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 17 | case object Empty extends Val | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 18 | case class Chr(c: Char) extends Val | 
| 520 | 19 | case class Sequ(v1: Val, v2: Val) extends Val | 
| 549 | 20 | case class Proj(n: Int, v: Val) extends Val | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 21 | case class Stars(vs: List[Val]) extends Val | 
| 549 | 22 | case class Rec(s: String, v: Val) extends Val | 
| 23 | ||
| 24 | // for manipulating projections | |
| 25 | def IncProj(m: Int, v: Val) = v match {
 | |
| 26 | case Proj(n, v) => Proj(n + m, v) | |
| 27 | } | |
| 28 | ||
| 29 | def DecProj(m: Int, v: Val) = v match {
 | |
| 30 | case Proj(n, v) => Proj(n - m, v) | |
| 31 | } | |
| 32 | ||
| 33 | ||
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 34 | // some convenience for typing in regular expressions | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 35 | def charlist2rexp(s : List[Char]): Rexp = s match {
 | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 36 | case Nil => ONE | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 37 | case c::Nil => CHAR(c) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 38 | case c::s => SEQ(CHAR(c), charlist2rexp(s)) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 39 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 40 | implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 41 | |
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 42 | implicit def RexpOps(r: Rexp) = new {
 | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 43 | def | (s: Rexp) = ALT(r, s) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 44 | def % = STAR(r) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 45 | def ~ (s: Rexp) = SEQ(r, s) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 46 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 47 | |
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 48 | implicit def stringOps(s: String) = new {
 | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 49 | def | (r: Rexp) = ALT(s, r) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 50 | def | (r: String) = ALT(s, r) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 51 | def % = STAR(s) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 52 | def ~ (r: Rexp) = SEQ(s, r) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 53 | def ~ (r: String) = SEQ(s, r) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 54 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 55 | |
| 549 | 56 | // string of a regular expression - for testing purposes | 
| 57 | def string(r: Rexp) : String = r match {
 | |
| 58 | case ZERO => "0" | |
| 59 | case ONE => "1" | |
| 60 | case CHAR(c) => c.toString | |
| 61 |   case ALTS(rs) => rs.map(string).mkString("[", "|", "]")
 | |
| 62 |   case SEQ(CHAR(c), CHAR(d)) => s"${c}${d}"
 | |
| 63 |   case SEQ(ONE, CHAR(c)) => s"1${c}"
 | |
| 64 |   case SEQ(r1, r2) => s"(${string(r1)} ~ ${string(r2)})"
 | |
| 65 |   case STAR(r) => s"(${string(r)})*"
 | |
| 66 | } | |
| 67 | ||
| 68 | // size of a regular expression - for testing purposes | |
| 69 | def size(r: Rexp) : Int = r match {
 | |
| 70 | case ZERO => 1 | |
| 71 | case ONE => 1 | |
| 72 | case CHAR(_) => 1 | |
| 73 | case ALTS(rs) => 1 + rs.map(size).sum | |
| 74 | case SEQ(r1, r2) => 1 + size(r1) + size(r2) | |
| 75 | case STAR(r) => 1 + size(r) | |
| 76 | } | |
| 77 | ||
| 78 | ||
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 79 | // nullable function: tests whether the regular | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 80 | // expression can recognise the empty string | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 81 | def nullable (r: Rexp) : Boolean = r match {
 | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 82 | case ZERO => false | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 83 | case ONE => true | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 84 | case CHAR(_) => false | 
| 549 | 85 | case ALTS(rs) => rs.exists(nullable) | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 86 | case SEQ(r1, r2) => nullable(r1) && nullable(r2) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 87 | case STAR(_) => true | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 88 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 89 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 90 | // derivative of a regular expression w.r.t. a character | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 91 | def der (c: Char, r: Rexp) : Rexp = r match {
 | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 92 | case ZERO => ZERO | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 93 | case ONE => ZERO | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 94 | case CHAR(d) => if (c == d) ONE else ZERO | 
| 549 | 95 | case ALTS(rs) => ALTS(rs.map(der(c, _))) | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 96 | case SEQ(r1, r2) => | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 97 | if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 98 | else SEQ(der(c, r1), r2) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 99 | case STAR(r) => SEQ(der(c, r), STAR(r)) | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 100 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 101 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 102 | // derivative w.r.t. a string (iterates der) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 103 | def ders (s: List[Char], r: Rexp) : Rexp = s match {
 | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 104 | case Nil => r | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 105 | case c::s => ders(s, der(c, r)) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 106 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 107 | |
| 550 | 108 | val test : Rexp= STAR("a" | "aa")
 | 
| 109 | size(test) | |
| 110 | size(der('a', test))
 | |
| 111 | size(der('a', der('a', test)))
 | |
| 112 | ||
| 113 | size(ders("aaaaaa".toList, test)) 
 | |
| 114 | string(ders("aaaaaa".toList, test))
 | |
| 115 | ||
| 116 | ||
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 117 | // extracts a string from value | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 118 | def flatten(v: Val) : String = v match {
 | 
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 119 | case Empty => "" | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 120 | case Chr(c) => c.toString | 
| 549 | 121 | case Proj(_, v) => flatten(v) | 
| 542 | 122 | case Sequ(v1, v2) => flatten(v1) ++ flatten(v2) | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 123 | case Stars(vs) => vs.map(flatten).mkString | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 124 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 125 | |
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 126 | |
| 549 | 127 | // mkeps | 
| 128 | def mkeps(r: Rexp) : Val = r match {
 | |
| 129 | case ONE => Empty | |
| 130 | case ALTS(r1::rs) => | |
| 131 | if (nullable(r1)) Proj(0, mkeps(r1)) | |
| 132 | else IncProj(1, mkeps(ALTS(rs))) | |
| 133 | case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) | |
| 134 | case STAR(r) => Stars(Nil) | |
| 135 | } | |
| 136 | ||
| 137 | // injection | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 138 | def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
 | 
| 520 | 139 | case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) | 
| 140 | case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) | |
| 549 | 141 | case (SEQ(r1, r2), Proj(0, Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) | 
| 142 | case (SEQ(r1, r2), Proj(1, v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) | |
| 143 | case (ALTS(rs), Proj(n, v1)) => Proj(n, inj(rs(n), c, v1)) | |
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 144 | case (CHAR(d), Empty) => Chr(c) | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 145 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 146 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 147 | // main lexing function (produces a value) | 
| 549 | 148 | // - does not simplify | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 149 | def lex(r: Rexp, s: List[Char]) : Val = s match {
 | 
| 549 | 150 |   case Nil => {
 | 
| 151 |     //println(s"Size of the last regex: ${size(r)}")
 | |
| 152 |     if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
 | |
| 153 | } | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 154 | case c::cs => inj(r, c, lex(der(c, r), cs)) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 155 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 156 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 157 | def lexing(r: Rexp, s: String) : Val = lex(r, s.toList) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 158 | |
| 549 | 159 | lexing("a" | ZERO, "a")
 | 
| 160 | lexing(ZERO | "a", "a") | |
| 450 
b93eaa833d31
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
426diff
changeset | 161 | |
| 
b93eaa833d31
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
426diff
changeset | 162 | lexing(("ab" | "a") ~ ("b" | ONE), "ab")
 | 
| 
b93eaa833d31
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
426diff
changeset | 163 | |
| 549 | 164 | |
| 165 | // removing duplicate regular expressions | |
| 166 | val unit = (ZERO, F_ERROR(_)) | |
| 167 | ||
| 168 | def dups2(xs: List[(Rexp, Val => Val)], | |
| 169 | 	 acc: List[(Rexp, Val => Val)] = Nil) : List[(Rexp, Val => Val)] = xs match {
 | |
| 170 | case Nil => acc | |
| 171 | case (x, y)::xs => | |
| 172 | if (acc.map(_._1).contains(x)) dups2(xs, acc :+ unit) | |
| 173 | else dups2(xs, acc :+ (x, y)) | |
| 174 | } | |
| 175 | ||
| 176 | def dups(xs: List[(Rexp, Val => Val)]) : List[(Rexp, Val => Val)] = {
 | |
| 177 | val out = dups2(xs) | |
| 178 |   //if (out != xs) {
 | |
| 179 | // println() | |
| 180 |   //  println(s"Input ${string(ALTS(xs.map(_._1)))}")
 | |
| 181 |   //  println(s"Ouput ${string(ALTS(out.map(_._1)))}")
 | |
| 182 | //} | |
| 183 | out | |
| 184 | } | |
| 185 | ||
| 186 | ||
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 187 | // some "rectification" functions for simplification | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 188 | def F_ID(v: Val): Val = v | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 189 | def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
 | 
| 520 | 190 | case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 191 | } | 
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 192 | def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = | 
| 520 | 193 | (v:Val) => Sequ(f1(Empty), f2(v)) | 
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 194 | def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = | 
| 520 | 195 | (v:Val) => Sequ(f1(v), f2(Empty)) | 
| 549 | 196 | def F_ERROR(v: Val): Val = throw new Exception("error")
 | 
| 197 | def F_PRINT(v: Val): Val = {
 | |
| 198 |   println(s"value is ${v}")
 | |
| 199 |   throw new Exception("caught error")
 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 200 | } | 
| 549 | 201 | |
| 202 | def flats(rs: List[Rexp], seen: Set[Rexp]) : (List[Rexp], Val => Val) = {
 | |
| 203 |   //println(s"I am flats: ${string(ALTS(rs))}")
 | |
| 204 |   //println(s"The size of seen is ${seen.size}, ${seen.map(string)}")
 | |
| 205 |   rs match {
 | |
| 206 | case Nil => (Nil, F_ERROR) | |
| 207 |   case r::rs1 if seen.contains(simp(r)._1) => {
 | |
| 208 |     //println(s" I remove ${string(r)}")
 | |
| 209 | val (rs2, f) = flats(rs1, seen) | |
| 210 | (rs2, (v:Val) => IncProj(1, f(v))) | |
| 211 | } | |
| 212 |   case ZERO::rs1 => {
 | |
| 213 | val (rs2, f) = flats(rs1, seen) | |
| 214 | (rs2, (v:Val) => IncProj(1, f(v))) | |
| 215 | } | |
| 216 |   case ALTS(rs0)::rs1 => {
 | |
| 217 | val (rss, f1) = flats(rs0, seen) | |
| 218 | val (rs2, f2) = flats(rs1, rss.toSet ++ seen) | |
| 219 |     (rss:::rs2, (v:Val) => v match {
 | |
| 220 | case Proj(n, vn) => | |
| 221 | if (n < rss.length) Proj(0, f1(Proj(n, vn))) | |
| 222 | else IncProj(1, f2(Proj(n - rss.length, vn))) | |
| 223 | }) | |
| 224 | } | |
| 225 |   case r1::rs2 => {
 | |
| 226 | val (r1s, f1) = simp(r1) | |
| 227 | val (rs3, f2) = flats(rs2, seen + r1s) | |
| 228 |     (r1s::rs3, (v:Val) => v match {
 | |
| 229 | case Proj(0, vn) => Proj(0, f1(vn)) | |
| 230 | case Proj(n, vn) => IncProj(1, f2(Proj(n - 1, vn))) | |
| 231 | }) | |
| 232 | } | |
| 233 | }} | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 234 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 235 | // simplification of regular expressions returning also an | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 236 | // rectification function; no simplification under STAR | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 237 | def simp(r: Rexp): (Rexp, Val => Val) = r match {
 | 
| 549 | 238 |   case ALTS(rs) => {
 | 
| 239 | val (rs_s, fs_s) = flats(rs, Set()) | |
| 240 |     rs_s match {
 | |
| 241 | case Nil => (ZERO, F_ERROR) | |
| 242 | case r::Nil => (r, (v:Val) => fs_s(Proj(0, v))) | |
| 243 | case rs_sd => (ALTS(rs_sd), fs_s) | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 244 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 245 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 246 |   case SEQ(r1, r2) => {
 | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 247 | val (r1s, f1s) = simp(r1) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 248 | val (r2s, f2s) = simp(r2) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 249 |     (r1s, r2s) match {
 | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 250 | case (ZERO, _) => (ZERO, F_ERROR) | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 251 | case (_, ZERO) => (ZERO, F_ERROR) | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 252 | case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 253 | case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) | 
| 549 | 254 | case (ALTS(rs), r2s) => (ALTS(rs.map(SEQ(_, r2s))), | 
| 255 | 			       (v:Val) => v match {
 | |
| 256 | case Proj(n, Sequ(v1, v2)) => Sequ(f1s(Proj(n, v1)), f2s(v2)) | |
| 257 | } | |
| 258 | ) | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 259 | case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 260 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 261 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 262 | case r => (r, F_ID) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 263 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 264 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 265 | def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
 | 
| 549 | 266 |   case Nil => {
 | 
| 267 |     //println(s"Size of the last regex: ${size(r)}")
 | |
| 268 |     //println(s"${string(r)}")
 | |
| 269 |     if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
 | |
| 270 | } | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 271 |   case c::cs => {
 | 
| 549 | 272 | val rd = der(c, r) | 
| 273 | val (r_simp, f_simp) = simp(rd) | |
| 552 | 274 |     println(s"BEFORE ${string(rd)}")
 | 
| 275 |     println(s"AFTER  ${string(r_simp)}")
 | |
| 549 | 276 | val rec = lex_simp(r_simp, cs) | 
| 277 | inj(r, c, f_simp(rec)) | |
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 278 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 279 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 280 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 281 | def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 282 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 283 | |
| 549 | 284 | lexing_simp("ab" | "aa", "ab")
 | 
| 285 | lexing_simp("ab" | "aa", "aa")
 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 286 | |
| 549 | 287 | lexing(STAR("a" | "aa"), "aaaaa")
 | 
| 288 | lexing_simp(STAR("a" | "aa"), "aaaaa")
 | |
| 450 
b93eaa833d31
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
426diff
changeset | 289 | |
| 549 | 290 | lexing(STAR("a" | "aa"), "aaaaaaaaaaa")
 | 
| 291 | lexing_simp(STAR("a" | "aa"), "aaaaaaaaaaa")
 | |
| 292 | ||
| 293 | lexing_simp(STAR("a" | "aa"), "a" * 2)
 | |
| 294 | lexing_simp(STAR("a" | "aa"), "a" * 3)
 | |
| 295 | lexing_simp(STAR("a" | "aa"), "a" * 4)
 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 296 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 297 | |
| 549 | 298 | lexing_simp(STAR("a" | "aa"), "a" * 20)
 | 
| 299 | lexing_simp(STAR("a" | "aa"), "a" * 2000)
 | |
| 300 | ||
| 301 | lexing(ALTS(List("aa", "aa", "aa", "ab", "ab")), "ab")
 | |
| 302 | lexing_simp(ALTS(List("aa", "aa", "aa", "ab", "ab")), "ab")
 | |
| 303 | ||
| 304 | lexing(ALTS(List(("aa" | "ab" | "aa"), "aa", "ab", "ab")), "ab")
 | |
| 305 | lexing_simp(ALTS(List(("aa" | "ab" | "aa"), "aa", "ab", "ab")), "ab")
 | |
| 306 | ||
| 307 | lexing(ALTS(List(ZERO, ZERO, ONE, "aa", ZERO, "aa", "aa")), "aa") | |
| 308 | lexing_simp(ALTS(List(ZERO, ZERO, ONE, "aa", ZERO, "aa", "aa")), "aa") | |
| 309 | ||
| 310 | lexing_simp(ONE | ZERO, "") | |
| 311 | lexing_simp(ZERO | ONE, "") | |
| 312 | ||
| 313 | lexing("a" | ZERO, "a")
 | |
| 314 | lexing_simp("a" | ZERO, "a")
 | |
| 315 | lexing(ZERO | "a", "a") | |
| 316 | lexing_simp(ZERO | "a", "a") | |
| 317 | ||
| 318 | lexing(ALTS(List(ZERO, ZERO, ONE, "a", ZERO, "a")), "a") | |
| 319 | lexing_simp(ALTS(List(ZERO, ZERO, ONE, "a", ZERO, "a")), "a") | |
| 320 | lexing(ALTS(List("a")), "a")
 | |
| 321 | lexing_simp(ALTS(List("a")), "a")
 | |
| 322 | ||
| 323 | lexing_simp(("a" | ZERO) | ZERO, "a")
 | |
| 324 | lexing_simp("a" | (ZERO | ZERO), "a")
 | |
| 325 | lexing_simp(ZERO | ("a" | ZERO), "a")
 | |
| 326 | ||
| 327 | ||
| 328 | lexing_simp("abc", "abc")
 | |
| 329 | ||
| 330 | lexing_simp("abc" | ONE, "abc")
 | |
| 331 | ||
| 332 | lexing(("a" | "ab") ~ ("b" | ""), "ab")
 | |
| 333 | lexing_simp(("a" | "ab") ~ ("b" | ""), "ab")
 | |
| 334 | lexing_simp(("ba" | "c" | "ab"), "ab")
 | |
| 335 | ||
| 336 | lexing(ALTS(List(ALTS(Nil), "c", "ab")), "ab") | |
| 337 | lexing_simp(ALTS(List(ALTS(Nil), "c", "ab")), "ab") | |
| 338 | ||
| 339 | lexing(ALTS(List(ALTS("ab"::Nil), "c", "ab")), "ab")
 | |
| 340 | lexing_simp(ALTS(List(ALTS("ab"::Nil), "c", "ab")), "ab")
 | |
| 341 | ||
| 342 | lexing(ALTS(List(ALTS(List("a","ab")), "c", "ab")), "ab")
 | |
| 343 | lexing_simp(ALTS(List(ALTS(List("a","ab")), "c", "ab")), "ab")
 | |
| 344 | ||
| 345 | lexing(ALTS(List(ALTS(List("ab","a")), "c", "ab")), "ab")
 | |
| 346 | lexing_simp(ALTS(List(ALTS(List("ab","a")), "c", "ab")), "ab")
 | |
| 347 | ||
| 348 | lexing(ALTS(List(ALTS(List("ab","a","a")), "c", "ab")), "ab")
 | |
| 349 | lexing_simp(ALTS(List(ALTS(List("ab","a","a")), "c", "ab")), "ab")
 | |
| 350 | ||
| 351 | lexing(ALTS(List(ALTS(List("a","ab","a")), "c", "ab")), "ab")
 | |
| 352 | lexing_simp(ALTS(List(ALTS(List("a","ab","a")), "c", "ab")), "ab")
 | |
| 353 | ||
| 354 | lexing(ALTS(List(ALTS(List("b","a","ab")), "c", "ab")), "ab")
 | |
| 355 | lexing_simp(ALTS(List(ALTS(List("b","a","ab")), "c", "ab")), "ab")
 | |
| 356 | ||
| 357 | ||
| 358 | lexing_simp(ALTS(List("ba", "c", "ab")), "ab")
 | |
| 359 | ||
| 360 | lexing(ALTS(List("a", "ab", "a")), "ab")
 | |
| 361 | lexing_simp(ALTS(List("a", "ab", "a")), "ab")
 | |
| 362 | ||
| 363 | lexing(STAR("a" | "aa"), "aa")
 | |
| 364 | lexing_simp(STAR("a" | "aa"), "aa")
 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 365 | |
| 542 | 366 | |
| 367 | ||
| 368 | ||
| 549 | 369 | def enum(n: Int, s: String) : Set[Rexp] = n match {
 | 
| 370 | case 0 => Set(ZERO, ONE) ++ s.toSet.map(CHAR) | |
| 371 |   case n => {
 | |
| 372 | val rs = enum(n - 1, s) | |
| 373 | rs ++ | |
| 374 | (for (r1 <- rs; r2 <- rs) yield ALT(r1, r2)) ++ | |
| 375 | (for (r1 <- rs; r2 <- rs) yield SEQ(r1, r2)) ++ | |
| 376 | (for (r1 <- rs) yield STAR(r1)) | |
| 377 | } | |
| 542 | 378 | } | 
| 379 | ||
| 549 | 380 | def strs(n: Int, cs: String) : Set[String] = {
 | 
| 381 |   if (n == 0) Set("")
 | |
| 382 |   else {
 | |
| 383 | val ss = strs(n - 1, cs) | |
| 384 | ss ++ | |
| 385 | (for (s <- ss; c <- cs.toList) yield c + s) | |
| 386 | } | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 387 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 388 | |
| 549 | 389 | import scala.util.Try | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 390 | |
| 549 | 391 | def tests(n: Int, m: Int, s: String) = {
 | 
| 392 | val rs = enum(n, s) | |
| 393 | val ss = strs(m, s) | |
| 394 |   println(s"cases generated: ${rs.size} regexes and ${ss.size} strings")
 | |
| 395 |   for (r1 <- rs.par; s1 <- ss.par) yield {
 | |
| 396 | val res1 = Try(Some(lexing(r1, s1))).getOrElse(None) | |
| 397 | val res2 = Try(Some(lexing_simp(r1, s1))).getOrElse(None) | |
| 398 |     if (res1 != res2) println(s"Disagree on ${r1} and ${s1}")
 | |
| 399 | if (res1 != res2) Some((r1, s1)) else None | |
| 400 | } | |
| 401 | } | |
| 402 | ||
| 403 | println("Testing")
 | |
| 404 | println(tests(2,7,"abc")) | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 405 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 406 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 407 |