| author | Christian Urban <urbanc@in.tum.de> | 
| Tue, 16 Oct 2018 14:40:30 +0100 | |
| changeset 581 | 4782a4bfc888 | 
| parent 580 | 3b81c582c1f0 | 
| child 617 | c41b68818eae | 
| permissions | -rw-r--r-- | 
| 581 | 1 | // Simple Tokenizer according to Sulzmann & Lu | 
| 2 | ||
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 3 | import scala.language.implicitConversions | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 4 | import scala.language.reflectiveCalls | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 5 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 6 | abstract class Rexp | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 7 | case object ZERO extends Rexp | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 8 | case object ONE extends Rexp | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 9 | case class CHAR(c: Char) extends Rexp | 
| 579 | 10 | case class ALT(r1: Rexp, r2: Rexp) extends Rexp | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 11 | case class SEQ(r1: Rexp, r2: Rexp) extends Rexp | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 12 | case class STAR(r: Rexp) extends Rexp | 
| 579 | 13 | case class RECD(x: String, r: Rexp) extends Rexp | 
| 581 | 14 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 15 | abstract class Val | 
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 16 | case object Empty extends Val | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 17 | case class Chr(c: Char) extends Val | 
| 520 | 18 | case class Sequ(v1: Val, v2: Val) extends Val | 
| 579 | 19 | case class Left(v: Val) extends Val | 
| 20 | case class Right(v: Val) extends Val | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 21 | case class Stars(vs: List[Val]) extends Val | 
| 579 | 22 | case class Rec(x: String, v: Val) extends Val | 
| 23 | ||
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 24 | // some convenience for typing in regular expressions | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 25 | def charlist2rexp(s : List[Char]): Rexp = s match {
 | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 26 | case Nil => ONE | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 27 | case c::Nil => CHAR(c) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 28 | case c::s => SEQ(CHAR(c), charlist2rexp(s)) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 29 | } | 
| 581 | 30 | implicit def string2rexp(s : String) : Rexp = | 
| 31 | charlist2rexp(s.toList) | |
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 32 | |
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 33 | implicit def RexpOps(r: Rexp) = new {
 | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 34 | def | (s: Rexp) = ALT(r, s) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 35 | def % = STAR(r) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 36 | def ~ (s: Rexp) = SEQ(r, s) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 37 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 38 | |
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 39 | implicit def stringOps(s: String) = new {
 | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 40 | def | (r: Rexp) = ALT(s, r) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 41 | def | (r: String) = ALT(s, r) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 42 | def % = STAR(s) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 43 | def ~ (r: Rexp) = SEQ(s, r) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 44 | def ~ (r: String) = SEQ(s, r) | 
| 579 | 45 | def $ (r: Rexp) = RECD(s, r) | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 46 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 47 | |
| 581 | 48 | // A test for more conveninet syntax | 
| 49 | val re : Rexp = ("ab" | "a") ~ ("b" | ONE)
 | |
| 50 | ||
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 51 | // nullable function: tests whether the regular | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 52 | // expression can recognise the empty string | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 53 | def nullable (r: Rexp) : Boolean = r match {
 | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 54 | case ZERO => false | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 55 | case ONE => true | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 56 | case CHAR(_) => false | 
| 579 | 57 | case ALT(r1, r2) => nullable(r1) || nullable(r2) | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 58 | case SEQ(r1, r2) => nullable(r1) && nullable(r2) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 59 | case STAR(_) => true | 
| 579 | 60 | case RECD(_, r1) => nullable(r1) | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 61 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 62 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 63 | // derivative of a regular expression w.r.t. a character | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 64 | def der (c: Char, r: Rexp) : Rexp = r match {
 | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 65 | case ZERO => ZERO | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 66 | case ONE => ZERO | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 67 | case CHAR(d) => if (c == d) ONE else ZERO | 
| 579 | 68 | case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) | 
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 69 | case SEQ(r1, r2) => | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 70 | if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 71 | else SEQ(der(c, r1), r2) | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 72 | case STAR(r) => SEQ(der(c, r), STAR(r)) | 
| 579 | 73 | case RECD(_, r1) => der(c, r1) | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 74 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 75 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 76 | // derivative w.r.t. a string (iterates der) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 77 | def ders (s: List[Char], r: Rexp) : Rexp = s match {
 | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 78 | case Nil => r | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 79 | case c::s => ders(s, der(c, r)) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 80 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 81 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 82 | // extracts a string from value | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 83 | def flatten(v: Val) : String = v match {
 | 
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 84 | case Empty => "" | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 85 | case Chr(c) => c.toString | 
| 579 | 86 | case Left(v) => flatten(v) | 
| 87 | case Right(v) => flatten(v) | |
| 88 | case Sequ(v1, v2) => flatten(v1) + flatten(v2) | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 89 | case Stars(vs) => vs.map(flatten).mkString | 
| 579 | 90 | case Rec(_, v) => flatten(v) | 
| 91 | } | |
| 92 | ||
| 93 | // extracts an environment from a value | |
| 581 | 94 | // used for tokenise a string | 
| 579 | 95 | def env(v: Val) : List[(String, String)] = v match {
 | 
| 96 | case Empty => Nil | |
| 97 | case Chr(c) => Nil | |
| 98 | case Left(v) => env(v) | |
| 99 | case Right(v) => env(v) | |
| 100 | case Sequ(v1, v2) => env(v1) ::: env(v2) | |
| 101 | case Stars(vs) => vs.flatMap(env) | |
| 102 | case Rec(x, v) => (x, flatten(v))::env(v) | |
| 103 | } | |
| 104 | ||
| 105 | // injection part | |
| 106 | def mkeps(r: Rexp) : Val = r match {
 | |
| 107 | case ONE => Empty | |
| 108 | case ALT(r1, r2) => | |
| 109 | if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) | |
| 110 | case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) | |
| 111 | case STAR(r) => Stars(Nil) | |
| 112 | case RECD(x, r) => Rec(x, mkeps(r)) | |
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 113 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 114 | |
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 115 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 116 | def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match {
 | 
| 520 | 117 | case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) | 
| 118 | case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) | |
| 579 | 119 | case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) | 
| 120 | case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) | |
| 121 | case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) | |
| 122 | case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) | |
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 123 | case (CHAR(d), Empty) => Chr(c) | 
| 579 | 124 | case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 125 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 126 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 127 | // main lexing function (produces a value) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 128 | def lex(r: Rexp, s: List[Char]) : Val = s match {
 | 
| 579 | 129 | case Nil => if (nullable(r)) mkeps(r) | 
| 130 |               else throw new Exception("Not matched")
 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 131 | case c::cs => inj(r, c, lex(der(c, r), cs)) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 132 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 133 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 134 | def lexing(r: Rexp, s: String) : Val = lex(r, s.toList) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 135 | |
| 581 | 136 | // a simple test for extracting an environment | 
| 137 | val re1 : Rexp = ("first" $ ("a" | "ab")) ~ ("second" $ ("b" | ONE))
 | |
| 138 | env(lexing(re1, "ab")) | |
| 450 
b93eaa833d31
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
426diff
changeset | 139 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 140 | // some "rectification" functions for simplification | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 141 | def F_ID(v: Val): Val = v | 
| 579 | 142 | def F_RIGHT(f: Val => Val) = (v:Val) => Right(f(v)) | 
| 143 | def F_LEFT(f: Val => Val) = (v:Val) => Left(f(v)) | |
| 144 | def F_ALT(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
 | |
| 145 | case Right(v) => Right(f2(v)) | |
| 146 | case Left(v) => Left(f1(v)) | |
| 147 | } | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 148 | def F_SEQ(f1: Val => Val, f2: Val => Val) = (v:Val) => v match {
 | 
| 520 | 149 | case Sequ(v1, v2) => Sequ(f1(v1), f2(v2)) | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 150 | } | 
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 151 | def F_SEQ_Empty1(f1: Val => Val, f2: Val => Val) = | 
| 520 | 152 | (v:Val) => Sequ(f1(Empty), f2(v)) | 
| 354 
86b2aeae3e98
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
352diff
changeset | 153 | def F_SEQ_Empty2(f1: Val => Val, f2: Val => Val) = | 
| 520 | 154 | (v:Val) => Sequ(f1(v), f2(Empty)) | 
| 579 | 155 | def F_RECD(f: Val => Val) = (v:Val) => v match {
 | 
| 156 | case Rec(x, v) => Rec(x, f(v)) | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 157 | } | 
| 579 | 158 | def F_ERROR(v: Val): Val = throw new Exception("error")
 | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 159 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 160 | // simplification of regular expressions returning also an | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 161 | // rectification function; no simplification under STAR | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 162 | def simp(r: Rexp): (Rexp, Val => Val) = r match {
 | 
| 579 | 163 |   case ALT(r1, r2) => {
 | 
| 164 | val (r1s, f1s) = simp(r1) | |
| 165 | val (r2s, f2s) = simp(r2) | |
| 166 |     (r1s, r2s) match {
 | |
| 167 | case (ZERO, _) => (r2s, F_RIGHT(f2s)) | |
| 168 | case (_, ZERO) => (r1s, F_LEFT(f1s)) | |
| 169 | case _ => if (r1s == r2s) (r1s, F_LEFT(f1s)) | |
| 170 | else (ALT (r1s, r2s), F_ALT(f1s, f2s)) | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 171 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 172 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 173 |   case SEQ(r1, r2) => {
 | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 174 | val (r1s, f1s) = simp(r1) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 175 | val (r2s, f2s) = simp(r2) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 176 |     (r1s, r2s) match {
 | 
| 426 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 177 | case (ZERO, _) => (ZERO, F_ERROR) | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 178 | case (_, ZERO) => (ZERO, F_ERROR) | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 179 | case (ONE, _) => (r2s, F_SEQ_Empty1(f1s, f2s)) | 
| 
0debe6f41396
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
354diff
changeset | 180 | case (_, ONE) => (r1s, F_SEQ_Empty2(f1s, f2s)) | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 181 | case _ => (SEQ(r1s,r2s), F_SEQ(f1s, f2s)) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 182 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 183 | } | 
| 579 | 184 |   case RECD(x, r1) => {
 | 
| 185 | val (r1s, f1s) = simp(r1) | |
| 186 | (RECD(x, r1s), F_RECD(f1s)) | |
| 187 | } | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 188 | case r => (r, F_ID) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 189 | } | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 190 | |
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 191 | def lex_simp(r: Rexp, s: List[Char]) : Val = s match {
 | 
| 579 | 192 |   case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched")
 | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 193 |   case c::cs => {
 | 
| 579 | 194 | val (r_simp, f_simp) = simp(der(c, r)) | 
| 195 | inj(r, c, f_simp(lex_simp(r_simp, cs))) | |
| 164 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 196 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 197 | } | 
| 
6c1d214c39ef
added progs
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 198 | |
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 199 | def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList) | 
| 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 200 | |
| 579 | 201 | lexing_simp(("a" | "ab") ~ ("b" | ""), "ab")
 | 
| 549 | 202 | |
| 579 | 203 | // Lexing Rules for a Small While Language | 
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 204 | |
| 579 | 205 | def PLUS(r: Rexp) = r ~ r.% | 
| 549 | 206 | |
| 579 | 207 | val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" | 
| 208 | val DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | |
| 209 | val ID = SYM ~ (SYM | DIGIT).% | |
| 210 | val NUM = PLUS(DIGIT) | |
| 211 | val KEYWORD : Rexp = "skip" | "while" | "do" | "if" | "then" | "else" | "read" | "write" | "true" | "false" | |
| 212 | val SEMI: Rexp = ";" | |
| 213 | val OP: Rexp = ":=" | "==" | "-" | "+" | "*" | "!=" | "<" | ">" | "<=" | ">=" | "%" | "/" | |
| 214 | val WHITESPACE = PLUS(" " | "\n" | "\t")
 | |
| 215 | val RPAREN: Rexp = ")" | |
| 216 | val LPAREN: Rexp = "("
 | |
| 217 | val BEGIN: Rexp = "{"
 | |
| 218 | val END: Rexp = "}" | |
| 219 | val STRING: Rexp = "\"" ~ SYM.% ~ "\"" | |
| 549 | 220 | |
| 221 | ||
| 579 | 222 | val WHILE_REGS = (("k" $ KEYWORD) | 
 | 
| 223 |                   ("i" $ ID) | 
 | |
| 224 |                   ("o" $ OP) | 
 | |
| 225 |                   ("n" $ NUM) | 
 | |
| 226 |                   ("s" $ SEMI) | 
 | |
| 227 |                   ("str" $ STRING) |
 | |
| 228 |                   ("p" $ (LPAREN | RPAREN)) | 
 | |
| 229 |                   ("b" $ (BEGIN | END)) | 
 | |
| 230 |                   ("w" $ WHITESPACE)).%
 | |
| 549 | 231 | |
| 579 | 232 | // Testing | 
| 233 | //============ | |
| 549 | 234 | |
| 579 | 235 | def time[T](code: => T) = {
 | 
| 236 | val start = System.nanoTime() | |
| 237 | val result = code | |
| 238 | val end = System.nanoTime() | |
| 239 | println((end - start)/1.0e9) | |
| 240 | result | |
| 241 | } | |
| 549 | 242 | |
| 579 | 243 | val r1 = ("a" | "ab") ~ ("bcd" | "c")
 | 
| 244 | println(lexing(r1, "abcd")) | |
| 549 | 245 | |
| 579 | 246 | val r2 = ("" | "a") ~ ("ab" | "b")
 | 
| 247 | println(lexing(r2, "ab")) | |
| 542 | 248 | |
| 249 | ||
| 579 | 250 | // Two Simple While Tests | 
| 251 | //======================== | |
| 252 | println("prog0 test")
 | |
| 542 | 253 | |
| 581 | 254 | val prog0 = """read if""" | 
| 579 | 255 | println(env(lexing_simp(WHILE_REGS, prog0))) | 
| 256 | ||
| 257 | println("prog1 test")
 | |
| 258 | ||
| 259 | val prog1 = """read n; write (n)""" | |
| 260 | println(env(lexing_simp(WHILE_REGS, prog1))) | |
| 261 | ||
| 262 | ||
| 263 | // Bigger Test | |
| 264 | //============= | |
| 265 | ||
| 266 | val prog2 = """ | |
| 267 | write "fib"; | |
| 268 | read n; | |
| 269 | minus1 := 0; | |
| 270 | minus2 := 1; | |
| 271 | while n > 0 do {
 | |
| 272 | temp := minus2; | |
| 273 | minus2 := minus1 + minus2; | |
| 274 | minus1 := temp; | |
| 275 | n := n - 1 | |
| 276 | }; | |
| 277 | write "result"; | |
| 278 | write minus2 | |
| 279 | """ | |
| 280 | ||
| 281 | println("Tokens")
 | |
| 282 | println(env(lexing_simp(WHILE_REGS, prog2))) | |
| 283 | println(env(lexing_simp(WHILE_REGS, prog2)).filterNot{_._1 == "w"}.mkString("\n"))
 | |
| 284 | ||
| 285 | // some more timing tests with | |
| 286 | // i copies of the program | |
| 287 | ||
| 288 | for (i <- 1 to 21 by 10) {
 | |
| 289 | print(i.toString + ": ") | |
| 290 | time(lexing_simp(WHILE_REGS, prog2 * i)) | |
| 542 | 291 | } | 
| 292 | ||
| 352 
1e1b0fe66107
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
165diff
changeset | 293 |