parser1.scala
changeset 92 e85600529ca5
parent 91 47f86885d481
child 93 4794759139ea
equal deleted inserted replaced
91:47f86885d481 92:e85600529ca5
     1 // A naive bottom-up parser with backtracking
       
     2 //
       
     3 // Needs:
       
     4 //   :load matcher.scala
       
     5 
       
     6 // some regular expressions
       
     7 val DIGIT = RANGE("0123456789")
       
     8 val NONZERODIGIT = RANGE("123456789")
       
     9 
       
    10 val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
       
    11 val LPAREN = CHAR('(')
       
    12 val RPAREN = CHAR(')')
       
    13 val WHITESPACE = PLUS(RANGE(" \n"))
       
    14 val OPS = RANGE("+-*")
       
    15 
       
    16 // for classifying the strings that have been recognised
       
    17 
       
    18 abstract class Token
       
    19 case object T_WHITESPACE extends Token
       
    20 case object T_NUM extends Token
       
    21 case class T_OP(s: String) extends Token
       
    22 case object T_LPAREN extends Token
       
    23 case object T_RPAREN extends Token
       
    24 case class NT(s: String) extends Token
       
    25 
       
    26 // lexing rules for arithmetic expressions
       
    27 val lexing_rules: List[Rule[Token]]= 
       
    28   List((NUMBER, (s) => T_NUM),
       
    29        (WHITESPACE, (s) => T_WHITESPACE),
       
    30        (LPAREN, (s) => T_LPAREN),
       
    31        (RPAREN, (s) => T_RPAREN),
       
    32        (OPS, (s) => T_OP(s.mkString)))
       
    33 
       
    34 // the tokenizer
       
    35 val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
       
    36 
       
    37 type Grammar = List[(String, List[Token])]
       
    38 
       
    39 // grammar for arithmetic expressions
       
    40 val grammar = 
       
    41   List ("F" -> List(T_NUM),
       
    42         "E" -> List(T_NUM),
       
    43         "E" -> List(NT("E"), T_OP("+"), NT("E")),
       
    44         "E" -> List(NT("E"), T_OP("-"), NT("E")),
       
    45         "E" -> List(NT("E"), T_OP("*"), NT("E")),    
       
    46         "E" -> List(T_LPAREN, NT("E"), T_RPAREN))
       
    47 
       
    48 
       
    49 def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = 
       
    50   ts1 match {
       
    51     case Nil => None
       
    52     case t::ts => 
       
    53       if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length))
       
    54       else chop(ts, prefix, t::ts2)
       
    55   }
       
    56 
       
    57 // examples for chop 
       
    58 chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil)  
       
    59 chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil)  
       
    60 
       
    61 def replace[A](ts: List[A], out: List[A], in: List [A]) = 
       
    62   chop(ts, out, Nil) match {
       
    63     case None => None
       
    64     case Some((before, after)) => Some(before ::: in ::: after)
       
    65   }  
       
    66 
       
    67 def parse(g: Grammar, ts: List[Token]) : Boolean = {
       
    68   println(ts)
       
    69   if (ts == List(NT("E"))) true
       
    70   else {
       
    71     val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs)))
       
    72     tss.flatten.exists(parse(g, _))
       
    73   }
       
    74 }
       
    75  
       
    76 def parser(g: Grammar, s: String) = {
       
    77   println("\n")
       
    78   parse(g, Tok.fromString(s))
       
    79 }
       
    80   
       
    81 
       
    82 
       
    83 parser(grammar, "2 + 3 *    4 +       1")
       
    84 parser(grammar, "(2 + 3) * (4 + 1)")
       
    85 parser(grammar, "(2 + 3) * 4 (4 + 1)")
       
    86 
       
    87 
       
    88