// A naive version of parser combinators producing parse trees
//
// Needs
// :load matcher.scala
// some regular expressions
val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
val ID = PLUS(LETTER)
val DIGIT = RANGE("0123456789")
val NONZERODIGIT = RANGE("123456789")
val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
val LPAREN = CHAR('(')
val RPAREN = CHAR(')')
val WHITESPACE = PLUS(RANGE(" \n"))
val OPS = RANGE("+-*")
// for classifying the strings that have been recognised
abstract class Token
case object T_WHITESPACE extends Token
case class T_NUM(s: String) extends Token
case class T_ID(s: String) extends Token
case class T_OP(s: String) extends Token
case object T_LPAREN extends Token
case object T_RPAREN extends Token
case object T_IF extends Token
case object T_THEN extends Token
case object T_ELSE extends Token
// lexing rules for arithmetic expressions
val lexing_rules: List[Rule[Token]]=
List(("if", (s) => T_IF),
("then", (s) => T_THEN),
("else", (s) => T_ELSE),
(NUMBER, (s) => T_NUM(s.mkString)),
(ID, (s) => T_ID(s.mkString)),
(WHITESPACE, (s) => T_WHITESPACE),
(LPAREN, (s) => T_LPAREN),
(RPAREN, (s) => T_RPAREN),
(OPS, (s) => T_OP(s.mkString)))
val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
// parse trees
abstract class ParseTree
case class Leaf(t: Token) extends ParseTree
case class Branch(pts: List[ParseTree]) extends ParseTree
def combine(pt1: ParseTree, pt2: ParseTree) = pt1 match {
case Leaf(t) => Branch(List(Leaf(t), pt2))
case Branch(pts) => Branch(pts ++ List(pt2))
}
// parser combinators
abstract class Parser {
def parse(ts: List[Token]): Set[(ParseTree, List[Token])]
def parse_all(ts: List[Token]) : Set[ParseTree] =
for ((head, tail) <- parse(ts); if (tail == Nil)) yield head
def || (right : => Parser) : Parser = new AltParser(this, right)
def ~ (right : => Parser) : Parser = new SeqParser(this, right)
}
class AltParser(p: => Parser, q: => Parser) extends Parser {
def parse (ts: List[Token]) = p.parse(ts) ++ q.parse(ts)
}
class SeqParser(p: => Parser, q: => Parser) extends Parser {
def parse(ts: List[Token]) =
for ((head1, tail1) <- p.parse(ts);
(head2, tail2) <- q.parse(tail1)) yield (combine(head1, head2), tail2)
}
class ListParser(ps: => List[Parser]) extends Parser {
def parse(ts: List[Token]) = ps match {
case Nil => Set()
case p::Nil => p.parse(ts)
case p::ps =>
for ((head1, tail1) <- p.parse(ts);
(head2, tail2) <- new ListParser(ps).parse(tail1)) yield (Branch(List(head1, head2)), tail2)
}
}
case class TokParser(tok: Token) extends Parser {
def parse(ts: List[Token]) = ts match {
case t::ts if (t == tok) => Set((Leaf(t), ts))
case _ => Set ()
}
}
implicit def token2tparser(t: Token) = TokParser(t)
case object IdParser extends Parser {
def parse(ts: List[Token]) = ts match {
case T_ID(s)::ts => Set((Leaf(T_ID(s)), ts))
case _ => Set ()
}
}
case object NumParser extends Parser {
def parse(ts: List[Token]) = ts match {
case T_NUM(s)::ts => Set((Leaf(T_NUM(s)), ts))
case _ => Set ()
}
}
lazy val E: Parser = (T ~ T_OP("+") ~ E) || T // start symbol
lazy val T: Parser = (F ~ T_OP("*") ~ T) || F
lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser
println(Tok.fromString("1 + 2 + 3"))
println(E.parse_all(Tok.fromString("1 + 2 + 3")))
def eval(t: ParseTree) : Int = t match {
case Leaf(T_NUM(n)) => n.toInt
case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2)
case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2)
case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t)
}
(E.parse_all(Tok.fromString("1 + 2 + 3"))).map(eval(_))
(E.parse_all(Tok.fromString("1 + 2 * 3"))).map(eval(_))
lazy val EXPR: Parser =
new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) ||
new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) ||
IdParser
println(EXPR.parse_all(Tok.fromString("if a then b else c")))
println(EXPR.parse_all(Tok.fromString("if a then if x then y else c")))