58 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
58 case SEQ(r1, r2) => nullable(r1) && nullable(r2) |
59 case STAR(_) => true |
59 case STAR(_) => true |
60 case RECD(_, r1) => nullable(r1) |
60 case RECD(_, r1) => nullable(r1) |
61 } |
61 } |
62 |
62 |
63 // derivative of a regular expression w.r.t. a character |
63 // the derivative of a regular expression w.r.t. a character |
64 def der (c: Char, r: Rexp) : Rexp = r match { |
64 def der (c: Char, r: Rexp) : Rexp = r match { |
65 case ZERO => ZERO |
65 case ZERO => ZERO |
66 case ONE => ZERO |
66 case ONE => ZERO |
67 case CHAR(d) => if (c == d) ONE else ZERO |
67 case CHAR(d) => if (c == d) ONE else ZERO |
68 case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
68 case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) |
71 else SEQ(der(c, r1), r2) |
71 else SEQ(der(c, r1), r2) |
72 case STAR(r) => SEQ(der(c, r), STAR(r)) |
72 case STAR(r) => SEQ(der(c, r), STAR(r)) |
73 case RECD(_, r1) => der(c, r1) |
73 case RECD(_, r1) => der(c, r1) |
74 } |
74 } |
75 |
75 |
76 // derivative w.r.t. a string (iterates der) |
76 // the derivative w.r.t. a string (iterates der) |
77 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
77 def ders (s: List[Char], r: Rexp) : Rexp = s match { |
78 case Nil => r |
78 case Nil => r |
79 case c::s => ders(s, der(c, r)) |
79 case c::s => ders(s, der(c, r)) |
80 } |
80 } |
81 |
81 |
88 case Sequ(v1, v2) => flatten(v1) + flatten(v2) |
88 case Sequ(v1, v2) => flatten(v1) + flatten(v2) |
89 case Stars(vs) => vs.map(flatten).mkString |
89 case Stars(vs) => vs.map(flatten).mkString |
90 case Rec(_, v) => flatten(v) |
90 case Rec(_, v) => flatten(v) |
91 } |
91 } |
92 |
92 |
93 // extracts an environment from a value |
93 // extracts an environment from a value; |
94 // used for tokenise a string |
94 // used for tokenise a string |
95 def env(v: Val) : List[(String, String)] = v match { |
95 def env(v: Val) : List[(String, String)] = v match { |
96 case Empty => Nil |
96 case Empty => Nil |
97 case Chr(c) => Nil |
97 case Chr(c) => Nil |
98 case Left(v) => env(v) |
98 case Left(v) => env(v) |
100 case Sequ(v1, v2) => env(v1) ::: env(v2) |
100 case Sequ(v1, v2) => env(v1) ::: env(v2) |
101 case Stars(vs) => vs.flatMap(env) |
101 case Stars(vs) => vs.flatMap(env) |
102 case Rec(x, v) => (x, flatten(v))::env(v) |
102 case Rec(x, v) => (x, flatten(v))::env(v) |
103 } |
103 } |
104 |
104 |
105 // injection part |
105 // The Injection Part of the Tokeniser |
|
106 |
|
107 // calculates a value for how a nullable regex |
|
108 // matches the empty string |
106 def mkeps(r: Rexp) : Val = r match { |
109 def mkeps(r: Rexp) : Val = r match { |
107 case ONE => Empty |
110 case ONE => Empty |
108 case ALT(r1, r2) => |
111 case ALT(r1, r2) => |
109 if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) |
112 if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) |
110 case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
113 case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
111 case STAR(r) => Stars(Nil) |
114 case STAR(r) => Stars(Nil) |
112 case RECD(x, r) => Rec(x, mkeps(r)) |
115 case RECD(x, r) => Rec(x, mkeps(r)) |
113 } |
116 } |
114 |
117 |
115 |
118 // injects back a character into a value |
116 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
119 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
117 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
120 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
118 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
121 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
119 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
122 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
120 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
123 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
122 case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) |
125 case (ALT(r1, r2), Right(v2)) => Right(inj(r2, c, v2)) |
123 case (CHAR(d), Empty) => Chr(c) |
126 case (CHAR(d), Empty) => Chr(c) |
124 case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) |
127 case (RECD(x, r1), _) => Rec(x, inj(r1, c, v)) |
125 } |
128 } |
126 |
129 |
127 // main lexing function (produces a value) |
130 // the main lexing function (produces a value) |
128 def lex(r: Rexp, s: List[Char]) : Val = s match { |
131 def lex(r: Rexp, s: List[Char]) : Val = s match { |
129 case Nil => if (nullable(r)) mkeps(r) |
132 case Nil => if (nullable(r)) mkeps(r) |
130 else throw new Exception("Not matched") |
133 else throw new Exception("Not matched") |
131 case c::cs => inj(r, c, lex(der(c, r), cs)) |
134 case c::cs => inj(r, c, lex(der(c, r), cs)) |
132 } |
135 } |
155 def F_RECD(f: Val => Val) = (v:Val) => v match { |
158 def F_RECD(f: Val => Val) = (v:Val) => v match { |
156 case Rec(x, v) => Rec(x, f(v)) |
159 case Rec(x, v) => Rec(x, f(v)) |
157 } |
160 } |
158 def F_ERROR(v: Val): Val = throw new Exception("error") |
161 def F_ERROR(v: Val): Val = throw new Exception("error") |
159 |
162 |
160 // simplification of regular expressions returning also an |
163 // simplification of regular expressions returns now also |
161 // rectification function; no simplification under STAR |
164 // an rectification function; no simplification under STAR |
162 def simp(r: Rexp): (Rexp, Val => Val) = r match { |
165 def simp(r: Rexp): (Rexp, Val => Val) = r match { |
163 case ALT(r1, r2) => { |
166 case ALT(r1, r2) => { |
164 val (r1s, f1s) = simp(r1) |
167 val (r1s, f1s) = simp(r1) |
165 val (r2s, f2s) = simp(r2) |
168 val (r2s, f2s) = simp(r2) |
166 (r1s, r2s) match { |
169 (r1s, r2s) match { |
186 (RECD(x, r1s), F_RECD(f1s)) |
189 (RECD(x, r1s), F_RECD(f1s)) |
187 } |
190 } |
188 case r => (r, F_ID) |
191 case r => (r, F_ID) |
189 } |
192 } |
190 |
193 |
|
194 // lexing functions including simplification |
191 def lex_simp(r: Rexp, s: List[Char]) : Val = s match { |
195 def lex_simp(r: Rexp, s: List[Char]) : Val = s match { |
192 case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched") |
196 case Nil => if (nullable(r)) mkeps(r) else throw new Exception("Not matched") |
193 case c::cs => { |
197 case c::cs => { |
194 val (r_simp, f_simp) = simp(der(c, r)) |
198 val (r_simp, f_simp) = simp(der(c, r)) |
195 inj(r, c, f_simp(lex_simp(r_simp, cs))) |
199 inj(r, c, f_simp(lex_simp(r_simp, cs))) |
198 |
202 |
199 def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList) |
203 def lexing_simp(r: Rexp, s: String) : Val = lex_simp(r, s.toList) |
200 |
204 |
201 lexing_simp(("a" | "ab") ~ ("b" | ""), "ab") |
205 lexing_simp(("a" | "ab") ~ ("b" | ""), "ab") |
202 |
206 |
203 // Lexing Rules for a Small While Language |
207 // The Lexing Rules for a Small While Language |
204 |
208 |
205 def PLUS(r: Rexp) = r ~ r.% |
209 def PLUS(r: Rexp) = r ~ r.% |
206 |
210 |
207 val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" |
211 val SYM = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" |
208 val DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
212 val DIGIT = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
283 println(env(lexing_simp(WHILE_REGS, prog2)).filterNot{_._1 == "w"}.mkString("\n")) |
287 println(env(lexing_simp(WHILE_REGS, prog2)).filterNot{_._1 == "w"}.mkString("\n")) |
284 |
288 |
285 // some more timing tests with |
289 // some more timing tests with |
286 // i copies of the program |
290 // i copies of the program |
287 |
291 |
288 for (i <- 1 to 21 by 10) { |
292 for (i <- 0 to 20 by 10) { |
289 print(i.toString + ": ") |
293 print(i.toString + ": ") |
290 time(lexing_simp(WHILE_REGS, prog2 * i)) |
294 time(lexing_simp(WHILE_REGS, prog2 * i)) |
291 } |
295 } |
292 |
296 |
293 |
297 |