7 case object ONE extends Rexp |
7 case object ONE extends Rexp |
8 case class CHAR(c: Char) extends Rexp |
8 case class CHAR(c: Char) extends Rexp |
9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
9 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
10 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
11 case class STAR(r: Rexp) extends Rexp |
11 case class STAR(r: Rexp) extends Rexp |
|
12 case class RECD(x: String, r: Rexp) extends Rexp |
12 |
13 |
13 abstract class ARexp |
14 abstract class ARexp |
14 case object AZERO extends ARexp |
15 case object AZERO extends ARexp |
15 case class AONE(bs: List[Boolean]) extends ARexp |
16 case class AONE(bs: List[Boolean]) extends ARexp |
16 case class ACHAR(bs: List[Boolean], c: Char) extends ARexp |
17 case class ACHAR(bs: List[Boolean], c: Char) extends ARexp |
23 case class Chr(c: Char) extends Val |
24 case class Chr(c: Char) extends Val |
24 case class Sequ(v1: Val, v2: Val) extends Val |
25 case class Sequ(v1: Val, v2: Val) extends Val |
25 case class Left(v: Val) extends Val |
26 case class Left(v: Val) extends Val |
26 case class Right(v: Val) extends Val |
27 case class Right(v: Val) extends Val |
27 case class Stars(vs: List[Val]) extends Val |
28 case class Stars(vs: List[Val]) extends Val |
|
29 case class Rec(x: String, v: Val) extends Val |
28 |
30 |
29 // some convenience for typing in regular expressions |
31 // some convenience for typing in regular expressions |
30 def charlist2rexp(s : List[Char]): Rexp = s match { |
32 def charlist2rexp(s : List[Char]): Rexp = s match { |
31 case Nil => ONE |
33 case Nil => ONE |
32 case c::Nil => CHAR(c) |
34 case c::Nil => CHAR(c) |
44 def | (r: Rexp) = ALT(s, r) |
46 def | (r: Rexp) = ALT(s, r) |
45 def | (r: String) = ALT(s, r) |
47 def | (r: String) = ALT(s, r) |
46 def % = STAR(s) |
48 def % = STAR(s) |
47 def ~ (r: Rexp) = SEQ(s, r) |
49 def ~ (r: Rexp) = SEQ(s, r) |
48 def ~ (r: String) = SEQ(s, r) |
50 def ~ (r: String) = SEQ(s, r) |
|
51 def $ (r: Rexp) = RECD(s, r) |
49 } |
52 } |
50 |
53 |
51 // translation into ARexps |
54 // translation into ARexps |
52 def fuse(bs: List[Boolean], r: ARexp) : ARexp = r match { |
55 def fuse(bs: List[Boolean], r: ARexp) : ARexp = r match { |
53 case AZERO => AZERO |
56 case AZERO => AZERO |
63 case ONE => AONE(Nil) |
66 case ONE => AONE(Nil) |
64 case CHAR(c) => ACHAR(Nil, c) |
67 case CHAR(c) => ACHAR(Nil, c) |
65 case ALT(r1, r2) => AALT(Nil, fuse(List(false), internalise(r1)), fuse(List(true), internalise(r2))) |
68 case ALT(r1, r2) => AALT(Nil, fuse(List(false), internalise(r1)), fuse(List(true), internalise(r2))) |
66 case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2)) |
69 case SEQ(r1, r2) => ASEQ(Nil, internalise(r1), internalise(r2)) |
67 case STAR(r) => ASTAR(Nil, internalise(r)) |
70 case STAR(r) => ASTAR(Nil, internalise(r)) |
|
71 case RECD(x, r) => internalise(r) |
68 } |
72 } |
69 |
73 |
70 internalise(("a" | "ab") ~ ("b" | "")) |
74 internalise(("a" | "ab") ~ ("b" | "")) |
71 |
75 |
72 |
76 |
84 case (SEQ(r1, r2), bs) => { |
88 case (SEQ(r1, r2), bs) => { |
85 val (v1, bs1) = decode_aux(r1, bs) |
89 val (v1, bs1) = decode_aux(r1, bs) |
86 val (v2, bs2) = decode_aux(r2, bs1) |
90 val (v2, bs2) = decode_aux(r2, bs1) |
87 (Sequ(v1, v2), bs2) |
91 (Sequ(v1, v2), bs2) |
88 } |
92 } |
89 case (STAR(r), false::bs) => { |
93 case (STAR(r1), false::bs) => { |
90 val (v, bs1) = decode_aux(r, bs) |
94 val (v, bs1) = decode_aux(r1, bs) |
91 val (Stars(vs), bs2) = decode_aux(STAR(r), bs1) |
95 val (Stars(vs), bs2) = decode_aux(STAR(r1), bs1) |
92 (Stars(v::vs), bs2) |
96 (Stars(v::vs), bs2) |
93 } |
97 } |
94 case (STAR(r), true::bs) => (Stars(Nil), bs) |
98 case (STAR(_), true::bs) => (Stars(Nil), bs) |
|
99 case (RECD(x, r1), bs) => { |
|
100 val (v, bs1) = decode_aux(r1, bs) |
|
101 (Rec(x, v), bs1) |
|
102 } |
95 } |
103 } |
96 |
104 |
97 def decode(r: Rexp, bs: List[Boolean]) = decode_aux(r, bs) match { |
105 def decode(r: Rexp, bs: List[Boolean]) = decode_aux(r, bs) match { |
98 case (v, Nil) => v |
106 case (v, Nil) => v |
99 case _ => throw new Exception("Not decodable") |
107 case _ => throw new Exception("Not decodable") |
143 case c::cs => lex(der(c, r), cs) |
151 case c::cs => lex(der(c, r), cs) |
144 } |
152 } |
145 |
153 |
146 def lexing(r: Rexp, s: String) : Val = decode(r, lex(internalise(r), s.toList)) |
154 def lexing(r: Rexp, s: String) : Val = decode(r, lex(internalise(r), s.toList)) |
147 |
155 |
|
156 |
|
157 |
|
158 def simp(r: ARexp): ARexp = r match { |
|
159 case ASEQ(bs1, r1, r2) => (simp(r1), simp(r2)) match { |
|
160 case (AZERO, _) => AZERO |
|
161 case (_, AZERO) => AZERO |
|
162 case (AONE(bs2), r2s) => fuse(bs1 ++ bs2, r2s) |
|
163 case (r1s, r2s) => ASEQ(bs1, r1s, r2s) |
|
164 } |
|
165 case AALT(bs1, r1, r2) => (simp(r1), simp(r2)) match { |
|
166 case (AZERO, r2s) => fuse(bs1, r2s) |
|
167 case (r1s, AZERO) => fuse(bs1, r1s) |
|
168 case (r1s, r2s) => AALT(bs1, r1s, r2s) |
|
169 } |
|
170 case r => r |
|
171 } |
|
172 |
|
173 def lex_simp(r: ARexp, s: List[Char]) : List[Boolean] = s match { |
|
174 case Nil => if (nullable(r)) mkepsBC(r) else throw new Exception("Not matched") |
|
175 case c::cs => lex(simp(der(c, r)), cs) |
|
176 } |
|
177 |
|
178 def lexing_simp(r: Rexp, s: String) : Val = decode(r, lex_simp(internalise(r), s.toList)) |
|
179 |
|
180 |
|
181 |
|
182 // extracts a string from value |
|
183 def flatten(v: Val) : String = v match { |
|
184 case Empty => "" |
|
185 case Chr(c) => c.toString |
|
186 case Left(v) => flatten(v) |
|
187 case Right(v) => flatten(v) |
|
188 case Sequ(v1, v2) => flatten(v1) + flatten(v2) |
|
189 case Stars(vs) => vs.map(flatten).mkString |
|
190 case Rec(_, v) => flatten(v) |
|
191 } |
|
192 |
|
193 // extracts an environment from a value |
|
194 def env(v: Val) : List[(String, String)] = v match { |
|
195 case Empty => Nil |
|
196 case Chr(c) => Nil |
|
197 case Left(v) => env(v) |
|
198 case Right(v) => env(v) |
|
199 case Sequ(v1, v2) => env(v1) ::: env(v2) |
|
200 case Stars(vs) => vs.flatMap(env) |
|
201 case Rec(x, v) => (x, flatten(v))::env(v) |
|
202 } |
|
203 |
148 // Some Tests |
204 // Some Tests |
149 //============ |
205 //============ |
150 |
206 |
151 def time_needed[T](i: Int, code: => T) = { |
207 def time_needed[T](i: Int, code: => T) = { |
152 val start = System.nanoTime() |
208 val start = System.nanoTime() |
158 |
214 |
159 |
215 |
160 |
216 |
161 val r0 = ("a" | "ab") ~ ("b" | "") |
217 val r0 = ("a" | "ab") ~ ("b" | "") |
162 println(lexing(r0, "ab")) |
218 println(lexing(r0, "ab")) |
|
219 println(lexing_simp(r0, "ab")) |
163 |
220 |
164 val r1 = ("a" | "ab") ~ ("bcd" | "cd") |
221 val r1 = ("a" | "ab") ~ ("bcd" | "cd") |
165 println(lexing(r1, "abcd")) |
222 println(lexing(r1, "abcd")) |
|
223 println(lexing_simp(r1, "abcd")) |
166 |
224 |
167 println(lexing((("" | "a") ~ ("ab" | "b")), "ab")) |
225 println(lexing((("" | "a") ~ ("ab" | "b")), "ab")) |
|
226 println(lexing_simp((("" | "a") ~ ("ab" | "b")), "ab")) |
|
227 |
168 println(lexing((("" | "a") ~ ("b" | "ab")), "ab")) |
228 println(lexing((("" | "a") ~ ("b" | "ab")), "ab")) |
|
229 println(lexing_simp((("" | "a") ~ ("b" | "ab")), "ab")) |
|
230 |
169 println(lexing((("" | "a") ~ ("c" | "ab")), "ab")) |
231 println(lexing((("" | "a") ~ ("c" | "ab")), "ab")) |
170 |
232 println(lexing_simp((("" | "a") ~ ("c" | "ab")), "ab")) |
171 |
233 |
172 |
234 |
173 // Two Simple Tests for the While Language |
235 // Two Simple Tests for the While Language |
174 //======================================== |
236 //======================================== |
175 |
237 |
188 val LPAREN: Rexp = "(" |
250 val LPAREN: Rexp = "(" |
189 val BEGIN: Rexp = "{" |
251 val BEGIN: Rexp = "{" |
190 val END: Rexp = "}" |
252 val END: Rexp = "}" |
191 val STRING: Rexp = "\"" ~ SYM.% ~ "\"" |
253 val STRING: Rexp = "\"" ~ SYM.% ~ "\"" |
192 |
254 |
193 val WHILE_REGS = (KEYWORD | |
255 val WHILE_REGS = (("k" $ KEYWORD) | |
194 ID | |
256 ("i" $ ID) | |
195 OP | |
257 ("o" $ OP) | |
196 NUM | |
258 ("n" $ NUM) | |
197 SEMI | |
259 ("s" $ SEMI) | |
198 LPAREN | RPAREN | |
260 ("str" $ STRING) | |
199 BEGIN | END | |
261 ("p" $ (LPAREN | RPAREN)) | |
200 WHITESPACE).% |
262 ("b" $ (BEGIN | END)) | |
201 |
263 ("w" $ WHITESPACE)).% |
202 |
264 |
203 println("prog0 test") |
265 println("prog0 test") |
204 |
266 |
205 val prog0 = """read n""" |
267 val prog0 = """read n""" |
206 println(lexing(WHILE_REGS, prog0)) |
268 println(env(lexing(WHILE_REGS, prog0))) |
|
269 println(env(lexing_simp(WHILE_REGS, prog0))) |
207 |
270 |
208 println("prog1 test") |
271 println("prog1 test") |
209 |
272 |
210 val prog1 = """read n; write (n)""" |
273 val prog1 = """read n; write (n)""" |
211 println(lexing(WHILE_REGS, prog1)) |
274 println(env(lexing(WHILE_REGS, prog1))) |
212 |
275 println(env(lexing_simp(WHILE_REGS, prog1))) |
213 |
276 |
|
277 |
|
278 // Sulzmann's tests |
|
279 //================== |
|
280 |
|
281 val sulzmann = ("a" | "b" | "ab").% |
|
282 |
|
283 println(lexing(sulzmann, "a" * 10)) |
|
284 println(lexing_simp(sulzmann, "a" * 10)) |
|
285 |
|
286 for (i <- 1 to 6501 by 500) { |
|
287 println(i + ": " + "%.5f".format(time_needed(1, lexing_simp(sulzmann, "a" * i)))) |
|
288 } |
|
289 |
|
290 for (i <- 1 to 16 by 5) { |
|
291 println(i + ": " + "%.5f".format(time_needed(1, lexing_simp(sulzmann, "ab" * i)))) |
|
292 } |