parser2.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Wed, 28 Nov 2012 08:28:26 +0000
changeset 84 719fd738d2a0
parent 71 7717f20f0504
permissions -rw-r--r--
tuned

// A naive version of parser combinators producing parse trees
//
// Needs
//   :load matcher.scala

// some regular expressions
val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
val ID = PLUS(LETTER)

val DIGIT = RANGE("0123456789")
val NONZERODIGIT = RANGE("123456789")
val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")

val LPAREN = CHAR('(')
val RPAREN = CHAR(')')

val WHITESPACE = PLUS(RANGE(" \n"))
val OPS = RANGE("+-*")

// for classifying the strings that have been recognised
abstract class Token

case object T_WHITESPACE extends Token
case class T_NUM(s: String) extends Token
case class T_ID(s: String) extends Token
case class T_OP(s: String) extends Token
case object T_LPAREN extends Token
case object T_RPAREN extends Token
case object T_IF extends Token
case object T_THEN extends Token
case object T_ELSE extends Token

// lexing rules for arithmetic expressions
val lexing_rules: List[Rule[Token]]= 
  List(("if", (s) => T_IF),
       ("then", (s) => T_THEN),
       ("else", (s) => T_ELSE),
       (NUMBER, (s) => T_NUM(s.mkString)),
       (ID, (s) => T_ID(s.mkString)),
       (WHITESPACE, (s) => T_WHITESPACE),
       (LPAREN, (s) => T_LPAREN),
       (RPAREN, (s) => T_RPAREN),
       (OPS, (s) => T_OP(s.mkString)))

val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))


// parse trees
abstract class ParseTree
case class Leaf(t: Token) extends ParseTree
case class Branch(pts: List[ParseTree]) extends ParseTree

def combine(pt1: ParseTree, pt2: ParseTree) = pt1 match {
  case Leaf(t) => Branch(List(Leaf(t), pt2))
  case Branch(pts) => Branch(pts ++ List(pt2))
}

// parser combinators
abstract class Parser {
  def parse(ts: List[Token]): Set[(ParseTree, List[Token])]

  def parse_all(ts: List[Token]) : Set[ParseTree] =
    for ((head, tail) <- parse(ts); if (tail == Nil)) yield head

  def || (right : => Parser) : Parser = new AltParser(this, right)
  def ~ (right : => Parser) : Parser = new SeqParser(this, right)
}

class AltParser(p: => Parser, q: => Parser) extends Parser {
  def parse (ts: List[Token]) = p.parse(ts) ++ q.parse(ts)   
}

class SeqParser(p: => Parser, q: => Parser) extends Parser {
  def parse(ts: List[Token]) = 
    for ((head1, tail1) <- p.parse(ts); 
         (head2, tail2) <- q.parse(tail1)) yield (combine(head1, head2), tail2)
}

class ListParser(ps: => List[Parser]) extends Parser {
  def parse(ts: List[Token]) = ps match {
    case Nil => Set()
    case p::Nil => p.parse(ts)
    case p::ps =>
      for ((head1, tail1) <- p.parse(ts); 
           (head2, tail2) <- new ListParser(ps).parse(tail1)) yield (Branch(List(head1, head2)), tail2)
  }
}

case class TokParser(tok: Token) extends Parser {
  def parse(ts: List[Token]) = ts match {
    case t::ts if (t == tok) => Set((Leaf(t), ts)) 
    case _ => Set ()
  }
}

implicit def token2tparser(t: Token) = TokParser(t)

case object IdParser extends Parser {
  def parse(ts: List[Token]) = ts match {
    case T_ID(s)::ts => Set((Leaf(T_ID(s)), ts)) 
    case _ => Set ()
  }
}

case object NumParser extends Parser {
  def parse(ts: List[Token]) = ts match {
    case T_NUM(s)::ts => Set((Leaf(T_NUM(s)), ts)) 
    case _ => Set ()
  }
}

lazy val E: Parser = (T ~ T_OP("+") ~ E) || T  // start symbol
lazy val T: Parser = (F ~ T_OP("*") ~ T) || F
lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser
   
println(Tok.fromString("1 + 2 + 3"))
println(E.parse_all(Tok.fromString("1 + 2 + 3")))

def eval(t: ParseTree) : Int = t match {
  case Leaf(T_NUM(n)) => n.toInt
  case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2)
  case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2)
  case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t) 
}

(E.parse_all(Tok.fromString("1 + 2 + 3"))).map(eval(_))
(E.parse_all(Tok.fromString("1 + 2 * 3"))).map(eval(_))

lazy val EXPR: Parser = 
  new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) || 
  new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) || 
  IdParser
 
println(EXPR.parse_all(Tok.fromString("if a then b else c")))
println(EXPR.parse_all(Tok.fromString("if a then if x then y else c")))