| author | Christian Urban <christian dot urban at kcl dot ac dot uk> | 
| Tue, 18 Oct 2016 20:39:54 +0100 | |
| changeset 456 | 4abd90760ffe | 
| parent 93 | 4794759139ea | 
| permissions | -rw-r--r-- | 
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 1 | // A naive version of parser combinators producing parse trees | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 2 | // | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 3 | // Needs | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 4 | // :load matcher.scala | 
| 62 | 5 | |
| 6 | // some regular expressions | |
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 7 | val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
 | 
| 62 | 8 | val ID = PLUS(LETTER) | 
| 9 | ||
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 10 | val DIGIT = RANGE("0123456789")
 | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 11 | val NONZERODIGIT = RANGE("123456789")
 | 
| 62 | 12 | val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") | 
| 13 | ||
| 14 | val LPAREN = CHAR('(')
 | |
| 15 | val RPAREN = CHAR(')')
 | |
| 16 | ||
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 17 | val WHITESPACE = PLUS(RANGE(" \n"))
 | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 18 | val OPS = RANGE("+-*")
 | 
| 62 | 19 | |
| 20 | // for classifying the strings that have been recognised | |
| 21 | abstract class Token | |
| 22 | ||
| 23 | case object T_WHITESPACE extends Token | |
| 24 | case class T_NUM(s: String) extends Token | |
| 25 | case class T_ID(s: String) extends Token | |
| 26 | case class T_OP(s: String) extends Token | |
| 27 | case object T_LPAREN extends Token | |
| 28 | case object T_RPAREN extends Token | |
| 29 | case object T_IF extends Token | |
| 30 | case object T_THEN extends Token | |
| 31 | case object T_ELSE extends Token | |
| 32 | ||
| 33 | // lexing rules for arithmetic expressions | |
| 34 | val lexing_rules: List[Rule[Token]]= | |
| 35 |   List(("if", (s) => T_IF),
 | |
| 36 |        ("then", (s) => T_THEN),
 | |
| 37 |        ("else", (s) => T_ELSE),
 | |
| 38 | (NUMBER, (s) => T_NUM(s.mkString)), | |
| 39 | (ID, (s) => T_ID(s.mkString)), | |
| 40 | (WHITESPACE, (s) => T_WHITESPACE), | |
| 41 | (LPAREN, (s) => T_LPAREN), | |
| 42 | (RPAREN, (s) => T_RPAREN), | |
| 43 | (OPS, (s) => T_OP(s.mkString))) | |
| 44 | ||
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 45 | val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 46 | |
| 62 | 47 | |
| 48 | // parse trees | |
| 49 | abstract class ParseTree | |
| 50 | case class Leaf(t: Token) extends ParseTree | |
| 51 | case class Branch(pts: List[ParseTree]) extends ParseTree | |
| 52 | ||
| 53 | def combine(pt1: ParseTree, pt2: ParseTree) = pt1 match {
 | |
| 54 | case Leaf(t) => Branch(List(Leaf(t), pt2)) | |
| 55 | case Branch(pts) => Branch(pts ++ List(pt2)) | |
| 56 | } | |
| 57 | ||
| 58 | // parser combinators | |
| 59 | abstract class Parser {
 | |
| 60 | def parse(ts: List[Token]): Set[(ParseTree, List[Token])] | |
| 61 | ||
| 62 | def parse_all(ts: List[Token]) : Set[ParseTree] = | |
| 63 | for ((head, tail) <- parse(ts); if (tail == Nil)) yield head | |
| 64 | ||
| 65 | def || (right : => Parser) : Parser = new AltParser(this, right) | |
| 66 | def ~ (right : => Parser) : Parser = new SeqParser(this, right) | |
| 67 | } | |
| 68 | ||
| 69 | class AltParser(p: => Parser, q: => Parser) extends Parser {
 | |
| 70 | def parse (ts: List[Token]) = p.parse(ts) ++ q.parse(ts) | |
| 71 | } | |
| 72 | ||
| 73 | class SeqParser(p: => Parser, q: => Parser) extends Parser {
 | |
| 74 | def parse(ts: List[Token]) = | |
| 75 | for ((head1, tail1) <- p.parse(ts); | |
| 76 | (head2, tail2) <- q.parse(tail1)) yield (combine(head1, head2), tail2) | |
| 77 | } | |
| 78 | ||
| 79 | class ListParser(ps: => List[Parser]) extends Parser {
 | |
| 80 |   def parse(ts: List[Token]) = ps match {
 | |
| 81 | case Nil => Set() | |
| 82 | case p::Nil => p.parse(ts) | |
| 83 | case p::ps => | |
| 84 | for ((head1, tail1) <- p.parse(ts); | |
| 85 | (head2, tail2) <- new ListParser(ps).parse(tail1)) yield (Branch(List(head1, head2)), tail2) | |
| 86 | } | |
| 87 | } | |
| 88 | ||
| 89 | case class TokParser(tok: Token) extends Parser {
 | |
| 90 |   def parse(ts: List[Token]) = ts match {
 | |
| 91 | case t::ts if (t == tok) => Set((Leaf(t), ts)) | |
| 92 | case _ => Set () | |
| 93 | } | |
| 94 | } | |
| 95 | ||
| 96 | implicit def token2tparser(t: Token) = TokParser(t) | |
| 97 | ||
| 98 | case object IdParser extends Parser {
 | |
| 99 |   def parse(ts: List[Token]) = ts match {
 | |
| 100 | case T_ID(s)::ts => Set((Leaf(T_ID(s)), ts)) | |
| 101 | case _ => Set () | |
| 102 | } | |
| 103 | } | |
| 104 | ||
| 105 | case object NumParser extends Parser {
 | |
| 106 |   def parse(ts: List[Token]) = ts match {
 | |
| 107 | case T_NUM(s)::ts => Set((Leaf(T_NUM(s)), ts)) | |
| 108 | case _ => Set () | |
| 109 | } | |
| 110 | } | |
| 111 | ||
| 112 | lazy val E: Parser = (T ~ T_OP("+") ~ E) || T  // start symbol
 | |
| 113 | lazy val T: Parser = (F ~ T_OP("*") ~ T) || F
 | |
| 114 | lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser | |
| 115 | ||
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 116 | println(Tok.fromString("1 + 2 + 3"))
 | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 117 | println(E.parse_all(Tok.fromString("1 + 2 + 3")))
 | 
| 62 | 118 | |
| 119 | def eval(t: ParseTree) : Int = t match {
 | |
| 120 | case Leaf(T_NUM(n)) => n.toInt | |
| 121 |   case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2)
 | |
| 122 |   case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2)
 | |
| 123 | case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t) | |
| 124 | } | |
| 125 | ||
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 126 | (E.parse_all(Tok.fromString("1 + 2 + 3"))).map(eval(_))
 | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 127 | (E.parse_all(Tok.fromString("1 + 2 * 3"))).map(eval(_))
 | 
| 62 | 128 | |
| 129 | lazy val EXPR: Parser = | |
| 130 | new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) || | |
| 131 | new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) || | |
| 132 | IdParser | |
| 133 | ||
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 134 | println(EXPR.parse_all(Tok.fromString("if a then b else c")))
 | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 135 | println(EXPR.parse_all(Tok.fromString("if a then if x then y else c")))
 | 
| 62 | 136 | |
| 137 | ||
| 138 | ||
| 139 |