progs/parser2a.scala
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Sat, 15 Jun 2013 09:23:18 -0400
changeset 93 4794759139ea
parent 92 scala/parser2a.scala@e85600529ca5
permissions -rw-r--r--
better organised

// Parser combinators including semantic actions
// parses lists of tokens
//
// Needs
//    :load matcher.scala

// some regular expressions
val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
val ID = PLUS(LETTER)

val DIGIT = RANGE("0123456789")
val NONZERODIGIT = RANGE("123456789")
val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")

val LPAREN = CHAR('(')
val RPAREN = CHAR(')')

val WHITESPACE = PLUS(RANGE(" \n"))
val OPS = RANGE("+-*")

// for classifying the strings that have been recognised
abstract class Token

case object T_WHITESPACE extends Token
case class T_NUM(s: String) extends Token
case class T_ID(s: String) extends Token
case class T_OP(s: String) extends Token
case object T_LPAREN extends Token
case object T_RPAREN extends Token
case object T_IF extends Token
case object T_THEN extends Token
case object T_ELSE extends Token

// lexing rules for arithmetic expressions
val lexing_rules: List[Rule[Token]]= 
  List(("if", (s) => T_IF),
       ("then", (s) => T_THEN),
       ("else", (s) => T_ELSE),
       (NUMBER, (s) => T_NUM(s.mkString)),
       (ID, (s) => T_ID(s.mkString)),
       (WHITESPACE, (s) => T_WHITESPACE),
       (LPAREN, (s) => T_LPAREN),
       (RPAREN, (s) => T_RPAREN),
       (OPS, (s) => T_OP(s.mkString)))

val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))

// parser combinators with return type T
abstract class Parser[T] {
  def parse(ts: List[Token]): Set[(T, List[Token])]

  def parse_all(ts: List[Token]) : Set[T] =
    for ((head, tail) <- parse(ts); if (tail == Nil)) yield head

  def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right)
  def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f)
  def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right)
  def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (x => x._2)
  def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (x => x._1)

}

class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] {
  def parse(sb: List[Token]) = 
    for ((head1, tail1) <- p.parse(sb); 
         (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2)
}

class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] {
  def parse (sb: List[Token]) = p.parse(sb) ++ q.parse(sb)   
}

class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] {
  def parse (sb: List[Token]) = 
    for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
}


case class TokParser(tok: Token) extends Parser[Token] {
  def parse(ts: List[Token]) = ts match {
    case t::ts if (t == tok) => Set((t, ts)) 
    case _ => Set ()
  }
}

implicit def token2tparser(t: Token) = TokParser(t)

case object NumParser extends Parser[Int] {
  def parse(ts: List[Token]) = ts match {
    case T_NUM(s)::ts => Set((s.toInt, ts)) 
    case _ => Set ()
  }
}

lazy val E: Parser[Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T  
lazy val T: Parser[Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F
lazy val F: Parser[Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser
   
println(E.parse_all(Tok.fromString("1 + 2 + 3")))
println(E.parse_all(Tok.fromString("1 + 2 * 3")))
println(E.parse_all(Tok.fromString("(1 + 2) * 3")))

// Excercise: implement minus 
println(E.parse_all(Tok.fromString("(1 - 2) * 3")))
println(E.parse_all(Tok.fromString("(1 + 2) * - 3")))