parser2.scala
changeset 71 7717f20f0504
parent 64 2d625418c011
equal deleted inserted replaced
70:e6868bd2942b 71:7717f20f0504
     1 :load matcher.scala
     1 // A naive version of parser combinators producing parse trees
       
     2 //
       
     3 // Needs
       
     4 //   :load matcher.scala
     2 
     5 
     3 // some regular expressions
     6 // some regular expressions
     4 val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz".toList)
     7 val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
     5 val ID = PLUS(LETTER)
     8 val ID = PLUS(LETTER)
     6 
     9 
     7 val DIGIT = RANGE("0123456789".toList)
    10 val DIGIT = RANGE("0123456789")
     8 val NONZERODIGIT = RANGE("123456789".toList)
    11 val NONZERODIGIT = RANGE("123456789")
     9 val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
    12 val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
    10 
    13 
    11 val LPAREN = CHAR('(')
    14 val LPAREN = CHAR('(')
    12 val RPAREN = CHAR(')')
    15 val RPAREN = CHAR(')')
    13 
    16 
    14 val WHITESPACE = PLUS(RANGE(" \n".toList))
    17 val WHITESPACE = PLUS(RANGE(" \n"))
    15 val OPS = RANGE("+-*".toList)
    18 val OPS = RANGE("+-*")
    16 
    19 
    17 // for classifying the strings that have been recognised
    20 // for classifying the strings that have been recognised
    18 abstract class Token
    21 abstract class Token
    19 
    22 
    20 case object T_WHITESPACE extends Token
    23 case object T_WHITESPACE extends Token
    25 case object T_RPAREN extends Token
    28 case object T_RPAREN extends Token
    26 case object T_IF extends Token
    29 case object T_IF extends Token
    27 case object T_THEN extends Token
    30 case object T_THEN extends Token
    28 case object T_ELSE extends Token
    31 case object T_ELSE extends Token
    29 
    32 
    30 def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = 
       
    31   tokenize(rs, s.toList).filterNot(_ match {
       
    32     case T_WHITESPACE => true
       
    33     case _ => false
       
    34   })
       
    35 
       
    36 
       
    37 // lexing rules for arithmetic expressions
    33 // lexing rules for arithmetic expressions
    38 val lexing_rules: List[Rule[Token]]= 
    34 val lexing_rules: List[Rule[Token]]= 
    39   List(("if", (s) => T_IF),
    35   List(("if", (s) => T_IF),
    40        ("then", (s) => T_THEN),
    36        ("then", (s) => T_THEN),
    41        ("else", (s) => T_ELSE),
    37        ("else", (s) => T_ELSE),
    43        (ID, (s) => T_ID(s.mkString)),
    39        (ID, (s) => T_ID(s.mkString)),
    44        (WHITESPACE, (s) => T_WHITESPACE),
    40        (WHITESPACE, (s) => T_WHITESPACE),
    45        (LPAREN, (s) => T_LPAREN),
    41        (LPAREN, (s) => T_LPAREN),
    46        (RPAREN, (s) => T_RPAREN),
    42        (RPAREN, (s) => T_RPAREN),
    47        (OPS, (s) => T_OP(s.mkString)))
    43        (OPS, (s) => T_OP(s.mkString)))
       
    44 
       
    45 val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
    48 
    46 
    49 
    47 
    50 // parse trees
    48 // parse trees
    51 abstract class ParseTree
    49 abstract class ParseTree
    52 case class Leaf(t: Token) extends ParseTree
    50 case class Leaf(t: Token) extends ParseTree
   113 
   111 
   114 lazy val E: Parser = (T ~ T_OP("+") ~ E) || T  // start symbol
   112 lazy val E: Parser = (T ~ T_OP("+") ~ E) || T  // start symbol
   115 lazy val T: Parser = (F ~ T_OP("*") ~ T) || F
   113 lazy val T: Parser = (F ~ T_OP("*") ~ T) || F
   116 lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser
   114 lazy val F: Parser = (T_LPAREN ~ E ~ T_RPAREN) || NumParser
   117    
   115    
   118 tokenizer(lexing_rules, "1 + 2 + 3")
   116 println(Tok.fromString("1 + 2 + 3"))
   119 println(E.parse_all(tokenizer(lexing_rules, "1 + 2 + 3")))
   117 println(E.parse_all(Tok.fromString("1 + 2 + 3")))
   120 
   118 
   121 def eval(t: ParseTree) : Int = t match {
   119 def eval(t: ParseTree) : Int = t match {
   122   case Leaf(T_NUM(n)) => n.toInt
   120   case Leaf(T_NUM(n)) => n.toInt
   123   case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2)
   121   case Branch(List(t1, Leaf(T_OP("+")), t2)) => eval(t1) + eval(t2)
   124   case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2)
   122   case Branch(List(t1, Leaf(T_OP("*")), t2)) => eval(t1) * eval(t2)
   125   case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t) 
   123   case Branch(List(Leaf(T_LPAREN), t, Leaf(T_RPAREN))) => eval(t) 
   126 }
   124 }
   127 
   125 
   128 (E.parse_all(tokenizer(lexing_rules, "1 + 2 + 3"))).map(eval(_))
   126 (E.parse_all(Tok.fromString("1 + 2 + 3"))).map(eval(_))
   129 (E.parse_all(tokenizer(lexing_rules, "1 + 2 * 3"))).map(eval(_))
   127 (E.parse_all(Tok.fromString("1 + 2 * 3"))).map(eval(_))
   130 (E.parse_all(tokenizer(lexing_rules, "(1 + 2) * 3"))).map(eval(_))
       
   131 
   128 
   132 lazy val EXPR: Parser = 
   129 lazy val EXPR: Parser = 
   133   new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) || 
   130   new ListParser(List(T_IF, EXPR, T_THEN, EXPR)) || 
   134   new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) || 
   131   new ListParser(List(T_IF, EXPR, T_THEN, EXPR, T_ELSE, EXPR)) || 
   135   IdParser
   132   IdParser
   136  
   133  
   137 println(EXPR.parse_all(tokenizer(lexing_rules, "if a then b else c")))
   134 println(EXPR.parse_all(Tok.fromString("if a then b else c")))
   138 println(EXPR.parse_all(tokenizer(lexing_rules, "if a then if x then y else c")))
   135 println(EXPR.parse_all(Tok.fromString("if a then if x then y else c")))
   139 
   136 
   140 
   137 
   141 
   138 
   142  
   139