parser1.scala
changeset 71 7717f20f0504
parent 61 a80f0cf17f91
equal deleted inserted replaced
70:e6868bd2942b 71:7717f20f0504
     1 :load matcher.scala
     1 // A naive bottom-up parser with backtracking
       
     2 //
       
     3 // Needs:
       
     4 //   :load matcher.scala
     2 
     5 
     3 // some regular expressions
     6 // some regular expressions
     4 val DIGIT = RANGE("0123456789".toList)
     7 val DIGIT = RANGE("0123456789")
     5 val NONZERODIGIT = RANGE("123456789".toList)
     8 val NONZERODIGIT = RANGE("123456789")
     6 
     9 
     7 val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
    10 val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0")
     8 val LPAREN = CHAR('(')
    11 val LPAREN = CHAR('(')
     9 val RPAREN = CHAR(')')
    12 val RPAREN = CHAR(')')
    10 val WHITESPACE = PLUS(RANGE(" \n".toList))
    13 val WHITESPACE = PLUS(RANGE(" \n"))
    11 val OPS = RANGE("+-*".toList)
    14 val OPS = RANGE("+-*")
    12 
    15 
    13 // for classifying the strings that have been recognised
    16 // for classifying the strings that have been recognised
       
    17 
    14 abstract class Token
    18 abstract class Token
    15 case object T_WHITESPACE extends Token
    19 case object T_WHITESPACE extends Token
    16 case object T_NUM extends Token
    20 case object T_NUM extends Token
    17 case class T_OP(s: String) extends Token
    21 case class T_OP(s: String) extends Token
    18 case object T_LPAREN extends Token
    22 case object T_LPAREN extends Token
    19 case object T_RPAREN extends Token
    23 case object T_RPAREN extends Token
    20 case class T_NT(s: String, rhs: List[Token]) extends Token
    24 case class NT(s: String) extends Token
    21 
       
    22 def tokenizer(rs: List[Rule[Token]], s: String) : List[Token] = 
       
    23   tokenize(rs, s.toList).filterNot(_ match {
       
    24     case T_WHITESPACE => true
       
    25     case _ => false
       
    26   })
       
    27 
       
    28 
       
    29 
    25 
    30 // lexing rules for arithmetic expressions
    26 // lexing rules for arithmetic expressions
    31 val lexing_rules: List[Rule[Token]]= 
    27 val lexing_rules: List[Rule[Token]]= 
    32   List((NUMBER, (s) => T_NUM),
    28   List((NUMBER, (s) => T_NUM),
    33        (WHITESPACE, (s) => T_WHITESPACE),
    29        (WHITESPACE, (s) => T_WHITESPACE),
    34        (LPAREN, (s) => T_LPAREN),
    30        (LPAREN, (s) => T_LPAREN),
    35        (RPAREN, (s) => T_RPAREN),
    31        (RPAREN, (s) => T_RPAREN),
    36        (OPS, (s) => T_OP(s.mkString)))
    32        (OPS, (s) => T_OP(s.mkString)))
    37 
    33 
       
    34 // the tokenizer
       
    35 val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE))
    38 
    36 
    39 type Grammar = List[(String, List[Token])]
    37 type Grammar = List[(String, List[Token])]
    40 
    38 
    41 // grammar for arithmetic expressions
    39 // grammar for arithmetic expressions
    42 val grammar = 
    40 val grammar = 
    43   List ("E" -> List(T_NUM),
    41   List ("F" -> List(T_NUM),
    44         "E" -> List(T_NT("E", Nil), T_OP("+"), T_NT("E", Nil)),
    42         "E" -> List(T_NUM),
    45         "E" -> List(T_NT("E", Nil), T_OP("-"), T_NT("E", Nil)),
    43         "E" -> List(NT("E"), T_OP("+"), NT("E")),
    46         "E" -> List(T_NT("E", Nil), T_OP("*"), T_NT("E", Nil)),    
    44         "E" -> List(NT("E"), T_OP("-"), NT("E")),
    47         "E" -> List(T_LPAREN, T_NT("E", Nil), T_RPAREN))
    45         "E" -> List(NT("E"), T_OP("*"), NT("E")),    
       
    46         "E" -> List(T_LPAREN, NT("E"), T_RPAREN))
    48 
    47 
    49 def startsWith[A](ts1: List[A], ts2: List[A]) : Boolean = (ts1, ts2) match {
       
    50   case (_, Nil) => true
       
    51   case (T_NT(e, _)::ts1,T_NT(f, _)::ts2) => (e == f) && startsWith(ts1, ts2)
       
    52   case (t1::ts1, t2::ts2) => (t1 == t2) && startsWith(ts1, ts2)
       
    53   case _ => false
       
    54 }
       
    55 
    48 
    56 def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = 
    49 def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = 
    57   ts1 match {
    50   ts1 match {
    58     case Nil => None
    51     case Nil => None
    59     case t::ts => 
    52     case t::ts => 
    60       if (startsWith(ts1, prefix)) Some(ts2.reverse, ts1.drop(prefix.length))
    53       if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length))
    61       else chop(ts, prefix, t::ts2)
    54       else chop(ts, prefix, t::ts2)
    62   }
    55   }
    63 
    56 
    64 // examples
    57 // examples for chop 
    65 chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil)  
    58 chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil)  
    66 chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil)  
    59 chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil)  
    67 
    60 
    68 def replace[A](ts: List[A], out: List[A], in: List [A]) = 
    61 def replace[A](ts: List[A], out: List[A], in: List [A]) = 
    69   chop(ts, out, Nil) match {
    62   chop(ts, out, Nil) match {
    70     case None => None
    63     case None => None
    71     case Some((before, after)) => Some(before ::: in ::: after)
    64     case Some((before, after)) => Some(before ::: in ::: after)
    72   }  
    65   }  
    73 
    66 
    74 def parse1(g: Grammar, ts: List[Token]) : Boolean = ts match {
    67 def parse(g: Grammar, ts: List[Token]) : Boolean = {
    75   case List(T_NT("E", tree)) => { println(tree); true }
    68   println(ts)
    76   case _ => {
    69   if (ts == List(NT("E"))) true
    77     val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(T_NT(lhs, rhs)))
    70   else {
    78     tss.flatten.exists(parse1(g, _))
    71     val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs)))
       
    72     tss.flatten.exists(parse(g, _))
    79   }
    73   }
    80 }
    74 }
    81  
    75  
       
    76 def parser(g: Grammar, s: String) = {
       
    77   println("\n")
       
    78   parse(g, Tok.fromString(s))
       
    79 }
       
    80   
    82 
    81 
    83 println() ; parse1(grammar, tokenizer(lexing_rules, "2 + 3 * 4 + 1"))
    82 
    84 println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * (4 + 1)"))
    83 parser(grammar, "2 + 3 *    4 +       1")
    85 println() ; parse1(grammar, tokenizer(lexing_rules, "(2 + 3) * 4 (4 + 1)"))
    84 parser(grammar, "(2 + 3) * (4 + 1)")
       
    85 parser(grammar, "(2 + 3) * 4 (4 + 1)")
    86 
    86 
    87 
    87 
    88  
    88