5 // |
5 // |
6 // amm lex.sc |
6 // amm lex.sc |
7 // |
7 // |
8 |
8 |
9 |
9 |
10 // regular expressions including records |
10 // regular expressions including recods |
11 abstract class Rexp |
11 // for extracting strings or tokens |
12 case object ZERO extends Rexp |
12 enum Rexp { |
13 case object ONE extends Rexp |
13 case ZERO |
14 case class CHAR(c: Char) extends Rexp |
14 case ONE |
15 case class ALT(r1: Rexp, r2: Rexp) extends Rexp |
15 case CHAR(c: Char) |
16 case class SEQ(r1: Rexp, r2: Rexp) extends Rexp |
16 case ALT(r1: Rexp, r2: Rexp) |
17 case class STAR(r: Rexp) extends Rexp |
17 case SEQ(r1: Rexp, r2: Rexp) |
18 case class RECD(x: String, r: Rexp) extends Rexp |
18 case STAR(r: Rexp) |
19 // records for extracting strings or tokens |
19 case RECD[A](label: A, r: Rexp) |
20 |
20 } |
|
21 import Rexp._ |
|
22 |
21 // values |
23 // values |
22 abstract class Val |
24 enum Val { |
23 case object Empty extends Val |
25 case Empty |
24 case class Chr(c: Char) extends Val |
26 case Chr(c: Char) |
25 case class Sequ(v1: Val, v2: Val) extends Val |
27 case Sequ(v1: Val, v2: Val) |
26 case class Left(v: Val) extends Val |
28 case Left(v: Val) |
27 case class Right(v: Val) extends Val |
29 case Right(v: Val) |
28 case class Stars(vs: List[Val]) extends Val |
30 case Stars(vs: List[Val]) |
29 case class Rec(x: String, v: Val) extends Val |
31 case Rec[A](label: A, v: Val) |
30 |
32 } |
|
33 import Val._ |
|
34 |
31 // some convenience for typing in regular expressions |
35 // some convenience for typing in regular expressions |
|
36 import scala.language.implicitConversions |
32 |
37 |
33 def charlist2rexp(s : List[Char]): Rexp = s match { |
38 def charlist2rexp(s : List[Char]): Rexp = s match { |
34 case Nil => ONE |
39 case Nil => ONE |
35 case c::Nil => CHAR(c) |
40 case c::Nil => CHAR(c) |
36 case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
41 case c::s => SEQ(CHAR(c), charlist2rexp(s)) |
89 } |
91 } |
90 |
92 |
91 |
93 |
92 // extracts an environment from a value; |
94 // extracts an environment from a value; |
93 // used for tokenising a string |
95 // used for tokenising a string |
94 def env(v: Val) : List[(String, String)] = v match { |
96 //import scala.reflect.ClassTag |
|
97 |
|
98 def env[A](v: Val) : List[(A, String)] = v match { |
95 case Empty => Nil |
99 case Empty => Nil |
96 case Chr(c) => Nil |
100 case Chr(c) => Nil |
97 case Left(v) => env(v) |
101 case Left(v) => env(v) |
98 case Right(v) => env(v) |
102 case Right(v) => env(v) |
99 case Sequ(v1, v2) => env(v1) ::: env(v2) |
103 case Sequ(v1, v2) => env(v1) ::: env(v2) |
100 case Stars(vs) => vs.flatMap(env) |
104 case Stars(vs) => vs.flatMap(env) |
101 case Rec(x, v) => (x, flatten(v))::env(v) |
105 case Rec[A](x, v) => (x, flatten(v))::env(v) |
102 } |
106 } |
103 |
107 |
104 |
108 |
105 // The injection and mkeps part of the lexer |
109 // The injection and mkeps part of the lexer |
106 //=========================================== |
110 //=========================================== |
107 |
111 |
108 def mkeps(r: Rexp) : Val = r match { |
112 // the pattern-matches are defined to be @unchecked |
|
113 // because they do not need to be defined for |
|
114 // all cases |
|
115 |
|
116 def mkeps(r: Rexp) : Val = (r: @unchecked) match { |
109 case ONE => Empty |
117 case ONE => Empty |
110 case ALT(r1, r2) => |
118 case ALT(r1, r2) => |
111 if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) |
119 if (nullable(r1)) Left(mkeps(r1)) else Right(mkeps(r2)) |
112 case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
120 case SEQ(r1, r2) => Sequ(mkeps(r1), mkeps(r2)) |
113 case STAR(r) => Stars(Nil) |
121 case STAR(r) => Stars(Nil) |
114 case RECD(x, r) => Rec(x, mkeps(r)) |
122 case RECD(x, r) => Rec(x, mkeps(r)) |
115 } |
123 } |
116 |
124 |
117 def inj(r: Rexp, c: Char, v: Val) : Val = (r, v) match { |
125 def inj(r: Rexp, c: Char, v: Val) : Val = ((r, v) : @unchecked) match { |
118 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
126 case (STAR(r), Sequ(v1, Stars(vs))) => Stars(inj(r, c, v1)::vs) |
119 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
127 case (SEQ(r1, r2), Sequ(v1, v2)) => Sequ(inj(r1, c, v1), v2) |
120 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
128 case (SEQ(r1, r2), Left(Sequ(v1, v2))) => Sequ(inj(r1, c, v1), v2) |
121 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
129 case (SEQ(r1, r2), Right(v2)) => Sequ(mkeps(r1), inj(r2, c, v2)) |
122 case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) |
130 case (ALT(r1, r2), Left(v1)) => Left(inj(r1, c, v1)) |
130 case Nil => if (nullable(r)) mkeps(r) else |
138 case Nil => if (nullable(r)) mkeps(r) else |
131 { throw new Exception("lexing error") } |
139 { throw new Exception("lexing error") } |
132 case c::cs => inj(r, c, lex(der(c, r), cs)) |
140 case c::cs => inj(r, c, lex(der(c, r), cs)) |
133 } |
141 } |
134 |
142 |
135 def lexing(r: Rexp, s: String) = |
143 |
136 env(lex(r, s.toList)) |
|
137 |
144 |
138 println(lex(("ab" | "a") ~ (ONE | "b"), "ab".toList)) |
145 println(lex(("ab" | "a") ~ (ONE | "b"), "ab".toList)) |
139 |
146 |
140 println(lex(STAR("aa" | "a"), "aaa".toList)) |
147 println(lex(STAR("aa" | "a"), "aaa".toList)) |
|
148 |
|
149 println(lex(STAR(STAR("a")), "aaa".toList)) |
|
150 |
|
151 val re = ("a" | "ab") ~ ("c" | "bc") |
|
152 |
|
153 println(pders1("abc", re).toList.mkString("\n")) |
|
154 pders('a', pder('a', re)))) |
|
155 draw(simp(der('a', der('a', der('a', re))))) |
|
156 |
|
157 size(simp(ders(, re))) |
|
158 size(simp(der('a', der('a', re)))) |
|
159 size(simp(der('a', der('a', der('a', re))))) |
|
160 |
|
161 |
|
162 lex(re, "aaaaa".toList) |
141 |
163 |
142 // The Lexing Rules for the WHILE Language |
164 // The Lexing Rules for the WHILE Language |
143 |
165 |
144 def PLUS(r: Rexp) = r ~ r.% |
166 def PLUS(r: Rexp) = r ~ r.% |
145 |
167 |
161 val RPAREN: Rexp = "}" |
183 val RPAREN: Rexp = "}" |
162 val LPAREN: Rexp = "{" |
184 val LPAREN: Rexp = "{" |
163 val STRING: Rexp = "\"" ~ SYM.% ~ "\"" |
185 val STRING: Rexp = "\"" ~ SYM.% ~ "\"" |
164 |
186 |
165 |
187 |
166 val WHILE_REGS = (("k" & KEYWORD) | |
188 enum TAGS { |
167 ("i" & ID) | |
189 case Key, Id, Op, Num, Semi, Str, Paren, Wht |
168 ("o" & OP) | |
190 } |
169 ("n" & NUM) | |
191 import TAGS._ |
170 ("s" & SEMI) | |
192 |
171 ("str" & STRING) | |
193 extension (t: TAGS) { |
172 ("p" & (LPAREN | RPAREN)) | |
194 def & (r: Rexp) = RECD[TAGS](t, r) |
173 ("w" & WHITESPACE)).% |
195 } |
174 |
196 |
175 val KY : Rexp = "if" | "read" | "write" |
197 def lexing(r: Rexp, s: String) = |
176 val WH : Rexp = " " | "\n" |
198 env[TAGS](lex(r, s.toList)) |
177 |
199 |
178 val TRIV_REGS = (("k" & KY) | |
200 val WHILE_REGS = ((Key & KEYWORD) | |
179 ("w" & WHITESPACE)).% |
201 (Id & ID) | |
|
202 (Op & OP) | |
|
203 (Num & NUM) | |
|
204 (Semi & SEMI) | |
|
205 (Str & STRING) | |
|
206 (Paren & (LPAREN | RPAREN)) | |
|
207 (Wht & WHITESPACE)).% |
|
208 |
180 |
209 |
181 // Two Simple While Tests |
210 // Two Simple While Tests |
182 //======================== |
211 //======================== |
183 |
212 |
184 //@arg(doc = "small tests") |
|
185 @main |
213 @main |
186 def small() = { |
214 def small() = { |
187 |
215 |
188 val prog0 = """if""" |
216 val prog0 = """if""" |
189 println(s"test: $prog0") |
217 println(s"test: $prog0") |