diff -r e85600529ca5 -r 4794759139ea progs/parser1.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/progs/parser1.scala Sat Jun 15 09:23:18 2013 -0400 @@ -0,0 +1,88 @@ +// A naive bottom-up parser with backtracking +// +// Needs: +// :load matcher.scala + +// some regular expressions +val DIGIT = RANGE("0123456789") +val NONZERODIGIT = RANGE("123456789") + +val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") +val LPAREN = CHAR('(') +val RPAREN = CHAR(')') +val WHITESPACE = PLUS(RANGE(" \n")) +val OPS = RANGE("+-*") + +// for classifying the strings that have been recognised + +abstract class Token +case object T_WHITESPACE extends Token +case object T_NUM extends Token +case class T_OP(s: String) extends Token +case object T_LPAREN extends Token +case object T_RPAREN extends Token +case class NT(s: String) extends Token + +// lexing rules for arithmetic expressions +val lexing_rules: List[Rule[Token]]= + List((NUMBER, (s) => T_NUM), + (WHITESPACE, (s) => T_WHITESPACE), + (LPAREN, (s) => T_LPAREN), + (RPAREN, (s) => T_RPAREN), + (OPS, (s) => T_OP(s.mkString))) + +// the tokenizer +val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) + +type Grammar = List[(String, List[Token])] + +// grammar for arithmetic expressions +val grammar = + List ("F" -> List(T_NUM), + "E" -> List(T_NUM), + "E" -> List(NT("E"), T_OP("+"), NT("E")), + "E" -> List(NT("E"), T_OP("-"), NT("E")), + "E" -> List(NT("E"), T_OP("*"), NT("E")), + "E" -> List(T_LPAREN, NT("E"), T_RPAREN)) + + +def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = + ts1 match { + case Nil => None + case t::ts => + if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length)) + else chop(ts, prefix, t::ts2) + } + +// examples for chop +chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil) +chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil) + +def replace[A](ts: List[A], out: List[A], in: List [A]) = + chop(ts, out, Nil) match { + case None => None + case Some((before, after)) => Some(before ::: in ::: after) + } + +def parse(g: Grammar, ts: List[Token]) : Boolean = { + println(ts) + if (ts == List(NT("E"))) true + else { + val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs))) + tss.flatten.exists(parse(g, _)) + } +} + +def parser(g: Grammar, s: String) = { + println("\n") + parse(g, Tok.fromString(s)) +} + + + +parser(grammar, "2 + 3 * 4 + 1") +parser(grammar, "(2 + 3) * (4 + 1)") +parser(grammar, "(2 + 3) * 4 (4 + 1)") + + +