| author | Christian Urban <urbanc@in.tum.de> | 
| Tue, 25 Apr 2017 12:28:07 +0100 | |
| changeset 485 | 21dec9df46ba | 
| parent 93 | 4794759139ea | 
| permissions | -rw-r--r-- | 
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
1  | 
// A naive version of parser combinators producing parse trees  | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
2  | 
//  | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
3  | 
// Needs  | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
4  | 
// :load matcher.scala  | 
| 62 | 5  | 
|
6  | 
// some regular expressions  | 
|
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
7  | 
val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
 | 
| 62 | 8  | 
val ID = PLUS(LETTER)  | 
9  | 
||
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
10  | 
val DIGIT = RANGE("0123456789")
 | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
11  | 
val NONZERODIGIT = RANGE("123456789")
 | 
| 62 | 12  | 
val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")  | 
13  | 
||
14  | 
val LPAREN = CHAR('(')
 | 
|
15  | 
val RPAREN = CHAR(')')
 | 
|
16  | 
||
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
17  | 
val WHITESPACE = PLUS(RANGE(" \n"))
 | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
18  | 
val OPS = RANGE("+-*")
 | 
| 62 | 19  | 
|
20  | 
// for classifying the strings that have been recognised  | 
|
21  | 
abstract class Token  | 
|
22  | 
||
23  | 
case object T_WHITESPACE extends Token  | 
|
24  | 
case class T_NUM(s: String) extends Token  | 
|
25  | 
case class T_ID(s: String) extends Token  | 
|
26  | 
case class T_OP(s: String) extends Token  | 
|
27  | 
case object T_LPAREN extends Token  | 
|
28  | 
case object T_RPAREN extends Token  | 
|
29  | 
case object T_IF extends Token  | 
|
30  | 
case object T_THEN extends Token  | 
|
31  | 
case object T_ELSE extends Token  | 
|
32  | 
||
33  | 
// lexing rules for arithmetic expressions  | 
|
34  | 
val lexing_rules: List[Rule[Token]]=  | 
|
35  | 
  List(("if", (s) => T_IF),
 | 
|
36  | 
       ("then", (s) => T_THEN),
 | 
|
37  | 
       ("else", (s) => T_ELSE),
 | 
|
38  | 
(NUMBER, (s) => T_NUM(s.mkString)),  | 
|
39  | 
(ID, (s) => T_ID(s.mkString)),  | 
|
40  | 
(WHITESPACE, (s) => T_WHITESPACE),  | 
|
41  | 
(LPAREN, (s) => T_LPAREN),  | 
|
42  | 
(RPAREN, (s) => T_RPAREN),  | 
|
43  | 
(OPS, (s) => T_OP(s.mkString)))  | 
|
44  | 
||
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
45  | 
val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))  | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
46  | 
|
| 62 | 47  | 
|
48  | 
// parse trees  | 
|
49  | 
abstract class ParseTree  | 
|
50  | 
case class Leaf(t: Token) extends ParseTree  | 
|
51  | 
case class Branch(pts: List[ParseTree]) extends ParseTree  | 
|
52  | 
||
53  | 
def combine(pt1: ParseTree, pt2: ParseTree) = pt1 match {
 | 
|
54  | 
case Leaf(t) => Branch(List(Leaf(t), pt2))  | 
|
55  | 
case Branch(pts) => Branch(pts ++ List(pt2))  | 
|
56  | 
}  | 
|
57  | 
||
58  | 
// parser combinators  | 
|
59  | 
abstract class Parser {
 | 
|
60  | 
def parse(ts: List[Token]): Set[(ParseTree, List[Token])]  | 
|
61  | 
||
62  | 
def parse_all(ts: List[Token]) : Set[ParseTree] =  | 
|
63  | 
for ((head, tail) <- parse(ts); if (tail == Nil)) yield head  | 
|
64  | 
||
65  | 
def || (right : => Parser) : Parser = new AltParser(this, right)  | 
|
66  | 
def ~ (right : => Parser) : Parser = new SeqParser(this, right)  | 
|
67  | 
}  | 
|
68  | 
||
69  | 
class AltParser(p: => Parser, q: => Parser) extends Parser {
 | 
|
70  | 
def parse (ts: List[Token]) = p.parse(ts) ++ q.parse(ts)  | 
|
71  | 
}  | 
|
72  | 
||
73  | 
class SeqParser(p: => Parser, q: => Parser) extends Parser {
 | 
|
74  | 
def parse(ts: List[Token]) =  | 
|
75  | 
for ((head1, tail1) <- p.parse(ts);  | 
|
76  | 
(head2, tail2) <- q.parse(tail1)) yield (combine(head1, head2), tail2)  | 
|
77  | 
}  | 
|
78  | 
||
79  | 
class ListParser(ps: => List[Parser]) extends Parser {
 | 
|
80  | 
  def parse(ts: List[Token]) = ps match {
 | 
|
81  | 
case Nil => Set()  | 
|
82  | 
case p::Nil => p.parse(ts)  | 
|
83  | 
case p::ps =>  | 
|
84  | 
for ((head1, tail1) <- p.parse(ts);  | 
|
85  | 
(head2, tail2) <- new ListParser(ps).parse(tail1)) yield (Branch(List(head1, head2)), tail2)  | 
|
86  | 
}  | 
|
87  | 
}  | 
|
88  | 
||
89  | 
case class TokParser(tok: Token) extends Parser {
 | 
|
90  | 
  def parse(ts: List[Token]) = ts match {
 | 
|
91  | 
case t::ts if (t == tok) => Set((Leaf(t), ts))  | 
|
92  | 
case _ => Set ()  | 
|
93  | 
}  | 
|
94  | 
}  | 
|
95  | 
||
96  | 
implicit def token2tparser(t: Token) = TokParser(t)  | 
|
97  | 
||
98  | 
case object IdParser extends Parser {
 | 
|
99  | 
  def parse(ts: List[Token]) = ts match {
 | 
|
100  | 
case T_ID(s)::ts => Set((Leaf(T_ID(s)), ts))  | 
|
101  | 
case _ => Set ()  | 
|
102  | 
}  | 
|
103  | 
}  | 
|
104  | 
||
105  | 
case object NumParser extends Parser {
 | 
|
106  | 
  def parse(ts: List[Token]) = ts match {
 | 
|
107  | 
case T_NUM(s)::ts => Set((Leaf(T_NUM(s)), ts))  | 
|
108  | 
case _ => Set ()  | 
|
109  | 
}  | 
|
110  | 
}  | 
|
111  | 
||
112  | 
lazy val E: Parser = (T ~ T_OP("+") ~ E) || T  // start symbol
 | 
|
113  | 
lazy val T: Parser = (F ~ T_OP("*") ~ T) || F
 | 
|
114  | 
lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser  | 
|
115  | 
||
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
116  | 
println(Tok.fromString("1 + 2 + 3"))
 | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
117  | 
println(E.parse_all(Tok.fromString("1 + 2 + 3")))
 | 
| 62 | 118  | 
|
119  | 
def eval(t: ParseTree) : Int = t match {
 | 
|
120  | 
case Leaf(T_NUM(n)) => n.toInt  | 
|
121  | 
  case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2)
 | 
|
122  | 
  case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2)
 | 
|
123  | 
case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t)  | 
|
124  | 
}  | 
|
125  | 
||
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
126  | 
(E.parse_all(Tok.fromString("1 + 2 + 3"))).map(eval(_))
 | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
127  | 
(E.parse_all(Tok.fromString("1 + 2 * 3"))).map(eval(_))
 | 
| 62 | 128  | 
|
129  | 
lazy val EXPR: Parser =  | 
|
130  | 
new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) ||  | 
|
131  | 
new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) ||  | 
|
132  | 
IdParser  | 
|
133  | 
||
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
134  | 
println(EXPR.parse_all(Tok.fromString("if a then b else c")))
 | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
64 
diff
changeset
 | 
135  | 
println(EXPR.parse_all(Tok.fromString("if a then if x then y else c")))
 | 
| 62 | 136  | 
|
137  | 
||
138  | 
||
139  |