1 :load matcher.scala |
1 // A naive version of parser combinators producing parse trees |
|
2 // |
|
3 // Needs |
|
4 // :load matcher.scala |
2 |
5 |
3 // some regular expressions |
6 // some regular expressions |
4 val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz".toList) |
7 val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz") |
5 val ID = PLUS(LETTER) |
8 val ID = PLUS(LETTER) |
6 |
9 |
7 val DIGIT = RANGE("0123456789".toList) |
10 val DIGIT = RANGE("0123456789") |
8 val NONZERODIGIT = RANGE("123456789".toList) |
11 val NONZERODIGIT = RANGE("123456789") |
9 val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") |
12 val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") |
10 |
13 |
11 val LPAREN = CHAR('(') |
14 val LPAREN = CHAR('(') |
12 val RPAREN = CHAR(')') |
15 val RPAREN = CHAR(')') |
13 |
16 |
14 val WHITESPACE = PLUS(RANGE(" \n".toList)) |
17 val WHITESPACE = PLUS(RANGE(" \n")) |
15 val OPS = RANGE("+-*".toList) |
18 val OPS = RANGE("+-*") |
16 |
19 |
17 // for classifying the strings that have been recognised |
20 // for classifying the strings that have been recognised |
18 abstract class Token |
21 abstract class Token |
19 |
22 |
20 case object T_WHITESPACE extends Token |
23 case object T_WHITESPACE extends Token |
25 case object T_RPAREN extends Token |
28 case object T_RPAREN extends Token |
26 case object T_IF extends Token |
29 case object T_IF extends Token |
27 case object T_THEN extends Token |
30 case object T_THEN extends Token |
28 case object T_ELSE extends Token |
31 case object T_ELSE extends Token |
29 |
32 |
30 def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = |
|
31 tokenize(rs, s.toList).filterNot(_ match { |
|
32 case T_WHITESPACE => true |
|
33 case _ => false |
|
34 }) |
|
35 |
|
36 |
|
37 // lexing rules for arithmetic expressions |
33 // lexing rules for arithmetic expressions |
38 val lexing_rules: List[Rule[Token]]= |
34 val lexing_rules: List[Rule[Token]]= |
39 List(("if", (s) => T_IF), |
35 List(("if", (s) => T_IF), |
40 ("then", (s) => T_THEN), |
36 ("then", (s) => T_THEN), |
41 ("else", (s) => T_ELSE), |
37 ("else", (s) => T_ELSE), |
43 (ID, (s) => T_ID(s.mkString)), |
39 (ID, (s) => T_ID(s.mkString)), |
44 (WHITESPACE, (s) => T_WHITESPACE), |
40 (WHITESPACE, (s) => T_WHITESPACE), |
45 (LPAREN, (s) => T_LPAREN), |
41 (LPAREN, (s) => T_LPAREN), |
46 (RPAREN, (s) => T_RPAREN), |
42 (RPAREN, (s) => T_RPAREN), |
47 (OPS, (s) => T_OP(s.mkString))) |
43 (OPS, (s) => T_OP(s.mkString))) |
|
44 |
|
45 val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) |
48 |
46 |
49 |
47 |
50 // parse trees |
48 // parse trees |
51 abstract class ParseTree |
49 abstract class ParseTree |
52 case class Leaf(t: Token) extends ParseTree |
50 case class Leaf(t: Token) extends ParseTree |
113 |
111 |
114 lazy val E: Parser = (T ~ T_OP("+") ~ E) || T // start symbol |
112 lazy val E: Parser = (T ~ T_OP("+") ~ E) || T // start symbol |
115 lazy val T: Parser = (F ~ T_OP("*") ~ T) || F |
113 lazy val T: Parser = (F ~ T_OP("*") ~ T) || F |
116 lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser |
114 lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser |
117 |
115 |
118 tokenizer(lexing_rules, "1 + 2 + 3") |
116 println(Tok.fromString("1 + 2 + 3")) |
119 println(E.parse_all(tokenizer(lexing_rules, "1 + 2 + 3"))) |
117 println(E.parse_all(Tok.fromString("1 + 2 + 3"))) |
120 |
118 |
121 def eval(t: ParseTree) : Int = t match { |
119 def eval(t: ParseTree) : Int = t match { |
122 case Leaf(T_NUM(n)) => n.toInt |
120 case Leaf(T_NUM(n)) => n.toInt |
123 case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2) |
121 case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2) |
124 case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2) |
122 case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2) |
125 case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t) |
123 case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t) |
126 } |
124 } |
127 |
125 |
128 (E.parse_all(tokenizer(lexing_rules, "1 + 2 + 3"))).map(eval(_)) |
126 (E.parse_all(Tok.fromString("1 + 2 + 3"))).map(eval(_)) |
129 (E.parse_all(tokenizer(lexing_rules, "1 + 2 * 3"))).map(eval(_)) |
127 (E.parse_all(Tok.fromString("1 + 2 * 3"))).map(eval(_)) |
130 (E.parse_all(tokenizer(lexing_rules, "(1 + 2) * 3"))).map(eval(_)) |
|
131 |
128 |
132 lazy val EXPR: Parser = |
129 lazy val EXPR: Parser = |
133 new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) || |
130 new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) || |
134 new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) || |
131 new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) || |
135 IdParser |
132 IdParser |
136 |
133 |
137 println(EXPR.parse_all(tokenizer(lexing_rules, "if a then b else c"))) |
134 println(EXPR.parse_all(Tok.fromString("if a then b else c"))) |
138 println(EXPR.parse_all(tokenizer(lexing_rules, "if a then if x then y else c"))) |
135 println(EXPR.parse_all(Tok.fromString("if a then if x then y else c"))) |
139 |
136 |
140 |
137 |
141 |
138 |
142 |
139 |