| author | Christian Urban <urbanc@in.tum.de> | 
| Thu, 26 Sep 2019 11:08:34 +0100 | |
| changeset 634 | c6506d4dc656 | 
| parent 93 | 4794759139ea | 
| permissions | -rw-r--r-- | 
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
1  | 
// A naive bottom-up parser with backtracking  | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
2  | 
//  | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
3  | 
// Needs:  | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
4  | 
// :load matcher.scala  | 
| 50 | 5  | 
|
6  | 
// some regular expressions  | 
|
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
7  | 
val DIGIT = RANGE("0123456789")
 | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
8  | 
val NONZERODIGIT = RANGE("123456789")
 | 
| 50 | 9  | 
|
10  | 
val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")  | 
|
11  | 
val LPAREN = CHAR('(')
 | 
|
12  | 
val RPAREN = CHAR(')')
 | 
|
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
13  | 
val WHITESPACE = PLUS(RANGE(" \n"))
 | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
14  | 
val OPS = RANGE("+-*")
 | 
| 50 | 15  | 
|
16  | 
// for classifying the strings that have been recognised  | 
|
| 61 | 17  | 
|
| 50 | 18  | 
abstract class Token  | 
19  | 
case object T_WHITESPACE extends Token  | 
|
20  | 
case object T_NUM extends Token  | 
|
21  | 
case class T_OP(s: String) extends Token  | 
|
22  | 
case object T_LPAREN extends Token  | 
|
23  | 
case object T_RPAREN extends Token  | 
|
| 51 | 24  | 
case class NT(s: String) extends Token  | 
| 50 | 25  | 
|
26  | 
// lexing rules for arithmetic expressions  | 
|
| 61 | 27  | 
val lexing_rules: List[Rule[Token]]=  | 
| 50 | 28  | 
List((NUMBER, (s) => T_NUM),  | 
29  | 
(WHITESPACE, (s) => T_WHITESPACE),  | 
|
30  | 
(LPAREN, (s) => T_LPAREN),  | 
|
31  | 
(RPAREN, (s) => T_RPAREN),  | 
|
32  | 
(OPS, (s) => T_OP(s.mkString)))  | 
|
33  | 
||
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
34  | 
// the tokenizer  | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
35  | 
val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))  | 
| 50 | 36  | 
|
37  | 
type Grammar = List[(String, List[Token])]  | 
|
38  | 
||
39  | 
// grammar for arithmetic expressions  | 
|
40  | 
val grammar =  | 
|
| 54 | 41  | 
  List ("F" -> List(T_NUM),
 | 
42  | 
"E" -> List(T_NUM),  | 
|
| 51 | 43  | 
        "E" -> List(NT("E"), T_OP("+"), NT("E")),
 | 
44  | 
        "E" -> List(NT("E"), T_OP("-"), NT("E")),
 | 
|
45  | 
        "E" -> List(NT("E"), T_OP("*"), NT("E")),    
 | 
|
46  | 
        "E" -> List(T_LPAREN, NT("E"), T_RPAREN))
 | 
|
| 50 | 47  | 
|
48  | 
||
49  | 
def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] =  | 
|
50  | 
  ts1 match {
 | 
|
51  | 
case Nil => None  | 
|
52  | 
case t::ts =>  | 
|
53  | 
if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length))  | 
|
54  | 
else chop(ts, prefix, t::ts2)  | 
|
55  | 
}  | 
|
56  | 
||
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
57  | 
// examples for chop  | 
| 50 | 58  | 
chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil)  | 
59  | 
chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil)  | 
|
60  | 
||
61  | 
def replace[A](ts: List[A], out: List[A], in: List [A]) =  | 
|
62  | 
  chop(ts, out, Nil) match {
 | 
|
63  | 
case None => None  | 
|
64  | 
case Some((before, after)) => Some(before ::: in ::: after)  | 
|
65  | 
}  | 
|
66  | 
||
| 51 | 67  | 
def parse(g: Grammar, ts: List[Token]) : Boolean = {
 | 
| 54 | 68  | 
println(ts)  | 
| 51 | 69  | 
  if (ts == List(NT("E"))) true
 | 
| 50 | 70  | 
  else {
 | 
| 51 | 71  | 
val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs)))  | 
72  | 
tss.flatten.exists(parse(g, _))  | 
|
| 50 | 73  | 
}  | 
74  | 
}  | 
|
75  | 
||
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
76  | 
def parser(g: Grammar, s: String) = {
 | 
| 51 | 77  | 
  println("\n")
 | 
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
78  | 
parse(g, Tok.fromString(s))  | 
| 51 | 79  | 
}  | 
80  | 
||
| 50 | 81  | 
|
| 51 | 82  | 
|
| 
71
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
83  | 
parser(grammar, "2 + 3 * 4 + 1")  | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
84  | 
parser(grammar, "(2 + 3) * (4 + 1)")  | 
| 
 
7717f20f0504
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
61 
diff
changeset
 | 
85  | 
parser(grammar, "(2 + 3) * 4 (4 + 1)")  | 
| 50 | 86  | 
|
87  | 
||
88  |