| 
     1 // A naive bottom-up parser with backtracking  | 
         | 
     2 //  | 
         | 
     3 // Needs:  | 
         | 
     4 //   :load matcher.scala  | 
         | 
     5   | 
         | 
     6 // some regular expressions  | 
         | 
     7 val DIGIT = RANGE("0123456789") | 
         | 
     8 val NONZERODIGIT = RANGE("123456789") | 
         | 
     9   | 
         | 
    10 val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")  | 
         | 
    11 val LPAREN = CHAR('(') | 
         | 
    12 val RPAREN = CHAR(')') | 
         | 
    13 val WHITESPACE = PLUS(RANGE(" \n")) | 
         | 
    14 val OPS = RANGE("+-*") | 
         | 
    15   | 
         | 
    16 // for classifying the strings that have been recognised  | 
         | 
    17   | 
         | 
    18 abstract class Token  | 
         | 
    19 case object T_WHITESPACE extends Token  | 
         | 
    20 case object T_NUM extends Token  | 
         | 
    21 case class T_OP(s: String) extends Token  | 
         | 
    22 case object T_LPAREN extends Token  | 
         | 
    23 case object T_RPAREN extends Token  | 
         | 
    24 case class NT(s: String) extends Token  | 
         | 
    25   | 
         | 
    26 // lexing rules for arithmetic expressions  | 
         | 
    27 val lexing_rules: List[Rule[Token]]=   | 
         | 
    28   List((NUMBER, (s) => T_NUM),  | 
         | 
    29        (WHITESPACE, (s) => T_WHITESPACE),  | 
         | 
    30        (LPAREN, (s) => T_LPAREN),  | 
         | 
    31        (RPAREN, (s) => T_RPAREN),  | 
         | 
    32        (OPS, (s) => T_OP(s.mkString)))  | 
         | 
    33   | 
         | 
    34 // the tokenizer  | 
         | 
    35 val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))  | 
         | 
    36   | 
         | 
    37 type Grammar = List[(String, List[Token])]  | 
         | 
    38   | 
         | 
    39 // grammar for arithmetic expressions  | 
         | 
    40 val grammar =   | 
         | 
    41   List ("F" -> List(T_NUM), | 
         | 
    42         "E" -> List(T_NUM),  | 
         | 
    43         "E" -> List(NT("E"), T_OP("+"), NT("E")), | 
         | 
    44         "E" -> List(NT("E"), T_OP("-"), NT("E")), | 
         | 
    45         "E" -> List(NT("E"), T_OP("*"), NT("E")),     | 
         | 
    46         "E" -> List(T_LPAREN, NT("E"), T_RPAREN)) | 
         | 
    47   | 
         | 
    48   | 
         | 
    49 def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] =   | 
         | 
    50   ts1 match { | 
         | 
    51     case Nil => None  | 
         | 
    52     case t::ts =>   | 
         | 
    53       if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length))  | 
         | 
    54       else chop(ts, prefix, t::ts2)  | 
         | 
    55   }  | 
         | 
    56   | 
         | 
    57 // examples for chop   | 
         | 
    58 chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil)    | 
         | 
    59 chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil)    | 
         | 
    60   | 
         | 
    61 def replace[A](ts: List[A], out: List[A], in: List [A]) =   | 
         | 
    62   chop(ts, out, Nil) match { | 
         | 
    63     case None => None  | 
         | 
    64     case Some((before, after)) => Some(before ::: in ::: after)  | 
         | 
    65   }    | 
         | 
    66   | 
         | 
    67 def parse(g: Grammar, ts: List[Token]) : Boolean = { | 
         | 
    68   println(ts)  | 
         | 
    69   if (ts == List(NT("E"))) true | 
         | 
    70   else { | 
         | 
    71     val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs)))  | 
         | 
    72     tss.flatten.exists(parse(g, _))  | 
         | 
    73   }  | 
         | 
    74 }  | 
         | 
    75    | 
         | 
    76 def parser(g: Grammar, s: String) = { | 
         | 
    77   println("\n") | 
         | 
    78   parse(g, Tok.fromString(s))  | 
         | 
    79 }  | 
         | 
    80     | 
         | 
    81   | 
         | 
    82   | 
         | 
    83 parser(grammar, "2 + 3 *    4 +       1")  | 
         | 
    84 parser(grammar, "(2 + 3) * (4 + 1)")  | 
         | 
    85 parser(grammar, "(2 + 3) * 4 (4 + 1)")  | 
         | 
    86   | 
         | 
    87   | 
         | 
    88    |