1 // A parser and interpreter for the While language |
1 // Parser Combinators: Simple Version |
2 // |
2 //==================================== |
3 |
3 // |
4 import scala.language.implicitConversions |
4 // Call with |
5 import scala.language.reflectiveCalls |
5 // |
6 |
6 // amm comb1.sc |
7 |
7 |
8 // more convenience for the semantic actions later on |
8 |
9 case class ~[+A, +B](_1: A, _2: B) |
9 // Note, in the lectures I did not show the implicit type constraint |
|
10 // I : IsSeq, which means that the input type 'I' needs to be |
|
11 // a sequence. |
10 |
12 |
11 type IsSeq[A] = A => Seq[_] |
13 type IsSeq[A] = A => Seq[_] |
12 |
14 |
13 abstract class Parser[I : IsSeq, T] { |
15 abstract class Parser[I : IsSeq, T]{ |
14 def parse(ts: I): Set[(T, I)] |
16 def parse(in: I): Set[(T, I)] |
15 |
17 |
16 def parse_all(ts: I) : Set[T] = |
18 def parse_all(in: I) : Set[T] = |
17 for ((head, tail) <- parse(ts); if tail.isEmpty) yield head |
19 for ((hd, tl) <- parse(in); |
18 } |
20 if tl.isEmpty) yield hd |
19 |
21 } |
20 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, ~[T, S]] { |
22 |
21 def parse(sb: I) = |
23 // parser combinators |
22 for ((head1, tail1) <- p.parse(sb); |
24 |
23 (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2) |
25 // alternative parser |
24 } |
26 class AltParser[I : IsSeq, T](p: => Parser[I, T], |
25 |
27 q: => Parser[I, T]) extends Parser[I, T] { |
26 class AltParser[I : IsSeq, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { |
28 def parse(in: I) = p.parse(in) ++ q.parse(in) |
27 def parse(sb: I) = p.parse(sb) ++ q.parse(sb) |
29 } |
28 } |
30 |
29 |
31 // sequence parser |
30 class MapParser[I : IsSeq, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { |
32 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], |
31 def parse(sb: I) = |
33 q: => Parser[I, S]) extends Parser[I, (T, S)] { |
32 for ((head, tail) <- p.parse(sb)) yield (f(head), tail) |
34 def parse(in: I) = |
33 } |
35 for ((hd1, tl1) <- p.parse(in); |
34 |
36 (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2) |
35 case class StrParser(s: String) extends Parser[String, String] { |
37 } |
36 def parse(sb: String) = { |
38 |
37 val (prefix, suffix) = sb.splitAt(s.length) |
39 // map parser |
38 if (prefix == s) Set((prefix, suffix)) else Set() |
40 class MapParser[I : IsSeq, T, S](p: => Parser[I, T], |
|
41 f: T => S) extends Parser[I, S] { |
|
42 def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) |
|
43 } |
|
44 |
|
45 |
|
46 |
|
47 // an example of an atomic parser for characters |
|
48 case class CharParser(c: Char) extends Parser[String, Char] { |
|
49 def parse(in: String) = |
|
50 if (in != "" && in.head == c) Set((c, in.tail)) else Set() |
|
51 } |
|
52 |
|
53 |
|
54 // an atomic parser for parsing strings according to a regex |
|
55 import scala.util.matching.Regex |
|
56 |
|
57 case class RegexParser(reg: Regex) extends Parser[String, String] { |
|
58 def parse(in: String) = reg.findPrefixMatchOf(in) match { |
|
59 case None => Set() |
|
60 case Some(m) => Set((m.matched, m.after.toString)) |
39 } |
61 } |
40 } |
62 } |
41 |
63 |
42 case object NumParser extends Parser[String, Int] { |
64 // atomic parsers for numbers and "verbatim" strings |
43 val reg = "[0-9]+".r |
65 val NumParser = RegexParser("[0-9]+".r) |
44 def parse(sb: String) = reg.findPrefixOf(sb) match { |
66 def StrParser(s: String) = RegexParser(Regex.quote(s).r) |
45 case None => Set() |
67 |
46 case Some(s) => { |
68 |
47 val (head, tail) = sb.splitAt(s.length) |
69 |
48 Set((head.toInt, tail)) |
70 // NumParserInt transforms a "string integer" into a propper Int |
49 } |
71 // (needs "new" because MapParser is not a case class) |
50 } |
72 |
51 } |
73 val NumParserInt = new MapParser(NumParser, (s: String) => s.toInt) |
52 |
74 |
|
75 |
|
76 // the following string interpolation allows us to write |
|
77 // StrParser(_some_string_) more conveniently as |
|
78 // |
|
79 // p"<_some_string_>" |
53 |
80 |
54 implicit def parser_interpolation(sc: StringContext) = new { |
81 implicit def parser_interpolation(sc: StringContext) = new { |
55 def p(args: Any*) = StrParser(sc.s(args:_*)) |
82 def p(args: Any*) = StrParser(sc.s(args:_*)) |
56 } |
83 } |
57 |
84 |
58 // this string interpolation allows us to write |
85 |
59 // things like the following for a StrParser |
86 // more convenient syntax for parser combinators |
60 // |
|
61 // p"<_some_string_>" |
|
62 // |
|
63 // instead of StrParser(<_some_string_>) |
|
64 |
|
65 |
|
66 implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new { |
87 implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new { |
67 def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) |
88 def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) |
68 def ~[S](q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) |
89 def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) |
69 def map[S](f: => T => S) = new MapParser[I, T, S](p, f) |
90 def map[S](f: => T => S) = new MapParser[I, T, S](p, f) |
70 } |
91 } |
71 |
92 |
72 // these implicits allow us to use infic notation for |
93 // these implicits allow us to use an infix notation for |
73 // sequences and alternatives; we also can write map |
94 // sequences and alternatives; we also can write the usual |
74 // for a parser |
95 // map for a MapParser |
75 |
96 |
76 |
97 |
77 // the abstract syntax trees for the WHILE language |
98 // with this NumParserInt can now be written more conveniently |
78 abstract class Stmt |
99 // as: |
79 abstract class AExp |
100 |
80 abstract class BExp |
101 val NumParserInt2 = NumParser.map(_.toInt) |
81 |
102 |
82 type Block = List[Stmt] |
103 |
83 |
104 // A parser for palindromes (just returns them as string) |
84 case object Skip extends Stmt |
105 lazy val Pal : Parser[String, String] = { |
85 case class If(a: BExp, bl1: Block, bl2: Block) extends Stmt |
106 (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } || |
86 case class While(b: BExp, bl: Block) extends Stmt |
107 (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => s"$x$y$z" } || |
87 case class Assign(s: String, a: AExp) extends Stmt |
108 p"a" || p"b" || p"" |
88 case class Write(s: String) extends Stmt |
109 } |
89 |
110 |
90 |
111 // examples |
91 case class Var(s: String) extends AExp |
112 Pal.parse_all("abaaaba") |
92 case class Num(i: Int) extends AExp |
113 Pal.parse("abaaaba") |
93 case class Aop(o: String, a1: AExp, a2: AExp) extends AExp |
114 |
94 |
115 println("Palindrome: " + Pal.parse_all("abaaaba")) |
95 case object True extends BExp |
116 |
96 case object False extends BExp |
117 // A parser for wellnested parentheses |
97 case class Bop(o: String, a1: AExp, a2: AExp) extends BExp |
118 // |
98 case class And(b1: BExp, b2: BExp) extends BExp |
119 // P ::= ( P ) P | epsilon |
99 case class Or(b1: BExp, b2: BExp) extends BExp |
120 // |
100 |
121 // (transforms '(' -> '{' , ')' -> '}' ) |
101 case object IdParser extends Parser[String, String] { |
122 lazy val P : Parser[String, String] = { |
102 val reg = "[a-z][a-z,0-9]*".r |
123 (p"(" ~ P ~ p")" ~ P).map{ case (((_, x), _), y) => "{" + x + "}" + y } || |
103 def parse(sb: String) = reg.findPrefixOf(sb) match { |
124 p"" |
104 case None => Set() |
125 } |
105 case Some(s) => Set(sb.splitAt(s.length)) |
126 |
106 } |
127 println(P.parse_all("(((()()))())")) |
107 } |
128 println(P.parse_all("(((()()))()))")) |
108 |
129 println(P.parse_all(")(")) |
109 // arithmetic expressions |
130 println(P.parse_all("()")) |
110 lazy val AExp: Parser[String, AExp] = |
131 |
111 (Te ~ p"+" ~ AExp).map[AExp]{ case x ~ _ ~ z => Aop("+", x, z) } || |
132 // A parser for arithmetic expressions (Terms and Factors) |
112 (Te ~ p"-" ~ AExp).map[AExp]{ case x ~ _ ~ z => Aop("-", x, z) } || Te |
133 |
113 lazy val Te: Parser[String, AExp] = |
134 lazy val E: Parser[String, Int] = { |
114 (Fa ~ p"*" ~ Te).map[AExp]{ case x ~ _ ~ z => Aop("*", x, z) } || |
135 (T ~ p"+" ~ E).map{ case ((x, _), z) => x + z } || |
115 (Fa ~ p"/" ~ Te).map[AExp]{ case x ~ _ ~ z => Aop("/", x, z) } || Fa |
136 (T ~ p"-" ~ E).map{ case ((x, _), z) => x - z } || T } |
116 lazy val Fa: Parser[String, AExp] = |
137 lazy val T: Parser[String, Int] = { |
117 (p"(" ~ AExp ~ p")").map{ case _ ~ y ~ _ => y } || |
138 (F ~ p"*" ~ T).map{ case ((x, _), z) => x * z } || F } |
118 IdParser.map(Var) || |
139 lazy val F: Parser[String, Int] = { |
119 NumParser.map(Num) |
140 (p"(" ~ E ~ p")").map{ case ((_, y), _) => y } || NumParserInt } |
120 |
141 |
121 // boolean expressions with some simple nesting |
142 println(E.parse_all("1+3+4")) |
122 lazy val BExp: Parser[String, BExp] = |
143 println(E.parse("1+3+4")) |
123 (AExp ~ p"==" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("==", x, z) } || |
144 println(E.parse_all("4*2+3")) |
124 (AExp ~ p"!=" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("!=", x, z) } || |
145 println(E.parse_all("4*(2+3)")) |
125 (AExp ~ p"<" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("<", x, z) } || |
146 println(E.parse_all("(4)*((2+3))")) |
126 (AExp ~ p">" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop(">", x, z) } || |
147 println(E.parse_all("4/2+3")) |
127 (p"(" ~ BExp ~ p")" ~ p"&&" ~ BExp).map[BExp]{ case _ ~ y ~ _ ~ _ ~ v => And(y, v) } || |
148 println(E.parse("1 + 2 * 3")) |
128 (p"(" ~ BExp ~ p")" ~ p"||" ~ BExp).map[BExp]{ case _ ~ y ~ _ ~ _ ~ v => Or(y, v) } || |
149 println(E.parse_all("(1+2)+3")) |
129 (p"true".map[BExp]{ _ => True }) || |
150 println(E.parse_all("1+2+3")) |
130 (p"false".map[BExp]{ _ => False }) || |
151 |
131 (p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ x ~ _ => x } |
152 |
132 |
153 // with parser combinators (and other parsing algorithms) |
133 // statement / statements |
154 // no left-recursion is allowed, otherwise the will loop |
134 lazy val Stmt: Parser[String, Stmt] = |
155 |
135 ((p"skip".map[Stmt]{_ => Skip }) || |
156 lazy val EL: Parser[String, Int] = |
136 (IdParser ~ p":=" ~ AExp).map[Stmt]{ case x ~ _ ~ z => Assign(x, z) } || |
157 ((EL ~ p"+" ~ EL).map{ case ((x, y), z) => x + z} || |
137 (p"write(" ~ IdParser ~ p")").map[Stmt]{ case _ ~ y ~ _ => Write(y) } || |
158 (EL ~ p"*" ~ EL).map{ case ((x, y), z) => x * z} || |
138 (p"if" ~ BExp ~ p"then" ~ Block ~ p"else" ~ Block) |
159 (p"(" ~ EL ~ p")").map{ case ((x, y), z) => y} || |
139 .map[Stmt]{ case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w) } || |
160 NumParserInt) |
140 (p"while" ~ BExp ~ p"do" ~ Block).map[Stmt]{ case _ ~ y ~ _ ~ w => While(y, w) }) |
161 |
141 |
162 // this will run forever: |
142 lazy val Stmts: Parser[String, Block] = |
163 //println(EL.parse_all("1+2+3")) |
143 (Stmt ~ p";" ~ Stmts).map[Block]{ case x ~ _ ~ z => x :: z } || |
164 |
144 (Stmt.map[Block]{ s => List(s) }) |
165 |
145 |
166 // non-ambiguous vs ambiguous grammars |
146 // blocks (enclosed in curly braces) |
167 |
147 lazy val Block: Parser[String, Block] = |
168 // ambiguous |
148 ((p"{" ~ Stmts ~ p"}").map{ case _ ~ y ~ _ => y } || |
169 lazy val S : Parser[String, String] = |
149 (Stmt.map(s => List(s)))) |
170 (p"1" ~ S ~ S).map{ case ((x, y), z) => x + y + z } || p"" |
150 |
171 |
151 |
172 //println(time(S.parse("1" * 10))) |
152 Stmts.parse_all("x2:=5+3;") |
173 //println(time(S.parse_all("1" * 10))) |
153 Block.parse_all("{x:=5;y:=8}") |
174 |
154 Block.parse_all("if(false)then{x:=5}else{x:=10}") |
175 // non-ambiguous |
155 |
176 lazy val U : Parser[String, String] = |
156 val fib = """n := 10; |
177 (p"1" ~ U).map{ case (x, y) => x + y } || p"" |
157 minus1 := 0; |
178 |
158 minus2 := 1; |
179 //println(time(U.parse("1" * 10))) |
159 temp := 0; |
180 //println(time(U.parse_all("1" * 10))) |
160 while (n > 0) do { |
181 println(U.parse("1" * 25)) |
161 temp := minus2; |
182 |
162 minus2 := minus1 + minus2; |
183 U.parse("11") |
163 minus1 := temp; |
184 U.parse("11111") |
164 n := n - 1 |
185 U.parse("11011") |
165 }; |
186 |
166 result := minus2""".replaceAll("\\s+", "") |
187 U.parse_all("1" * 100) |
167 |
188 U.parse_all("1" * 100 + "0") |
168 Stmts.parse_all(fib) |
189 |
169 |
190 // you can see the difference in second example |
170 |
191 //S.parse_all("1" * 100) // succeeds |
171 // an interpreter for the WHILE language |
192 //S.parse_all("1" * 100 + "0") // fails |
172 type Env = Map[String, Int] |
193 |
173 |
194 |
174 def eval_aexp(a: AExp, env: Env) : Int = a match { |
195 // A variant which counts how many 1s are parsed |
175 case Num(i) => i |
196 lazy val UCount : Parser[String, Int] = |
176 case Var(s) => env(s) |
197 (p"1" ~ UCount).map{ case (_, y) => y + 1 } || p"".map{ _ => 0 } |
177 case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env) |
198 |
178 case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env) |
199 println(UCount.parse("11111")) |
179 case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env) |
200 println(UCount.parse_all("11111")) |
180 case Aop("/", a1, a2) => eval_aexp(a1, env) / eval_aexp(a2, env) |
201 |
181 } |
202 // Two single character parsers |
182 |
203 lazy val One : Parser[String, String] = p"a" |
183 def eval_bexp(b: BExp, env: Env) : Boolean = b match { |
204 lazy val Two : Parser[String, String] = p"b" |
184 case True => true |
205 |
185 case False => false |
206 One.parse("a") |
186 case Bop("==", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env) |
207 One.parse("aaa") |
187 case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env)) |
208 |
188 case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env) |
209 // note how the pairs nest to the left with sequence parsers |
189 case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env) |
210 (One ~ One).parse("aaa") |
190 case And(b1, b2) => eval_bexp(b1, env) && eval_bexp(b2, env) |
211 (One ~ One ~ One).parse("aaa") |
191 case Or(b1, b2) => eval_bexp(b1, env) || eval_bexp(b2, env) |
212 (One ~ One ~ One ~ One).parse("aaaa") |
192 } |
213 |
193 |
214 (One || Two).parse("aaa") |
194 def eval_stmt(s: Stmt, env: Env) : Env = s match { |
215 |
195 case Skip => env |
216 |
196 case Assign(x, a) => env + (x -> eval_aexp(a, env)) |
217 |
197 case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) |
218 // a problem with the arithmetic expression parser: it |
198 case While(b, bl) => |
219 // gets very slow with deeply nested parentheses |
199 if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env)) |
220 |
200 else env |
221 println("Runtime problem") |
201 case Write(x) => { println(env(x)) ; env } |
222 println(E.parse("1")) |
202 } |
223 println(E.parse("(1)")) |
203 |
224 println(E.parse("((1))")) |
204 def eval_bl(bl: Block, env: Env) : Env = bl match { |
225 //println(E.parse("(((1)))")) |
205 case Nil => env |
226 //println(E.parse("((((1))))")) |
206 case s::bl => eval_bl(bl, eval_stmt(s, env)) |
227 //println(E.parse("((((((1))))))")) |
207 } |
228 //println(E.parse("(((((((1)))))))")) |
208 |
229 //println(E.parse("((((((((1)))))))")) |
209 def eval(bl: Block) : Env = eval_bl(bl, Map()) |
|
210 |
|
211 // parse + evaluate fib program; then lookup what is |
|
212 // stored under the variable result |
|
213 println(eval(Stmts.parse_all(fib).head)("result")) |
|
214 |
|
215 |
|
216 // more examles |
|
217 |
|
218 // calculate and print all factors bigger |
|
219 // than 1 and smaller than n |
|
220 println("Factors") |
|
221 |
|
222 val factors = |
|
223 """n := 12; |
|
224 f := 2; |
|
225 while (f < n / 2 + 1) do { |
|
226 if ((n / f) * f == n) then { write(f) } else { skip }; |
|
227 f := f + 1 |
|
228 }""".replaceAll("\\s+", "") |
|
229 |
|
230 eval(Stmts.parse_all(factors).head) |
|
231 |
|
232 // calculate all prime numbers up to a number |
|
233 println("Primes") |
|
234 |
|
235 val primes = |
|
236 """end := 100; |
|
237 n := 2; |
|
238 while (n < end) do { |
|
239 f := 2; |
|
240 tmp := 0; |
|
241 while ((f < n / 2 + 1) && (tmp == 0)) do { |
|
242 if ((n / f) * f == n) then { tmp := 1 } else { skip }; |
|
243 f := f + 1 |
|
244 }; |
|
245 if (tmp == 0) then { write(n) } else { skip }; |
|
246 n := n + 1 |
|
247 }""".replaceAll("\\s+", "") |
|
248 |
|
249 eval(Stmts.parse_all(primes).head) |
|