| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Sat, 04 Nov 2023 18:28:09 +0000 | |
| changeset 952 | f09d9db4e922 | 
| parent 742 | 155426396b5f | 
| permissions | -rw-r--r-- | 
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 1 | // A naive bottom-up parser with backtracking | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 2 | // | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 3 | // Needs: | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 4 | // :load matcher.scala | 
| 50 | 5 | |
| 6 | // some regular expressions | |
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 7 | val DIGIT = RANGE("0123456789")
 | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 8 | val NONZERODIGIT = RANGE("123456789")
 | 
| 50 | 9 | |
| 10 | val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") | |
| 11 | val LPAREN = CHAR('(')
 | |
| 12 | val RPAREN = CHAR(')')
 | |
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 13 | val WHITESPACE = PLUS(RANGE(" \n"))
 | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 14 | val OPS = RANGE("+-*")
 | 
| 50 | 15 | |
| 16 | // for classifying the strings that have been recognised | |
| 61 | 17 | |
| 50 | 18 | abstract class Token | 
| 19 | case object T_WHITESPACE extends Token | |
| 20 | case object T_NUM extends Token | |
| 21 | case class T_OP(s: String) extends Token | |
| 22 | case object T_LPAREN extends Token | |
| 23 | case object T_RPAREN extends Token | |
| 51 | 24 | case class NT(s: String) extends Token | 
| 50 | 25 | |
| 26 | // lexing rules for arithmetic expressions | |
| 61 | 27 | val lexing_rules: List[Rule[Token]]= | 
| 50 | 28 | List((NUMBER, (s) => T_NUM), | 
| 29 | (WHITESPACE, (s) => T_WHITESPACE), | |
| 30 | (LPAREN, (s) => T_LPAREN), | |
| 31 | (RPAREN, (s) => T_RPAREN), | |
| 32 | (OPS, (s) => T_OP(s.mkString))) | |
| 33 | ||
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 34 | // the tokenizer | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 35 | val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) | 
| 50 | 36 | |
| 37 | type Grammar = List[(String, List[Token])] | |
| 38 | ||
| 39 | // grammar for arithmetic expressions | |
| 40 | val grammar = | |
| 54 | 41 |   List ("F" -> List(T_NUM),
 | 
| 42 | "E" -> List(T_NUM), | |
| 51 | 43 |         "E" -> List(NT("E"), T_OP("+"), NT("E")),
 | 
| 44 |         "E" -> List(NT("E"), T_OP("-"), NT("E")),
 | |
| 45 |         "E" -> List(NT("E"), T_OP("*"), NT("E")),    
 | |
| 46 |         "E" -> List(T_LPAREN, NT("E"), T_RPAREN))
 | |
| 50 | 47 | |
| 48 | ||
| 49 | def chop[A](ts1: List[A], prefix: List[A], ts2: List[A]) : Option[(List[A], List[A])] = | |
| 50 |   ts1 match {
 | |
| 51 | case Nil => None | |
| 52 | case t::ts => | |
| 53 | if (ts1.startsWith(prefix)) Some(ts2.reverse, ts1.drop(prefix.length)) | |
| 54 | else chop(ts, prefix, t::ts2) | |
| 55 | } | |
| 56 | ||
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 57 | // examples for chop | 
| 50 | 58 | chop(List(1,2,3,4,5,6,7,8,9), List(4,5), Nil) | 
| 59 | chop(List(1,2,3,4,5,6,7,8,9), List(3,5), Nil) | |
| 60 | ||
| 61 | def replace[A](ts: List[A], out: List[A], in: List [A]) = | |
| 62 |   chop(ts, out, Nil) match {
 | |
| 63 | case None => None | |
| 64 | case Some((before, after)) => Some(before ::: in ::: after) | |
| 65 | } | |
| 66 | ||
| 51 | 67 | def parse(g: Grammar, ts: List[Token]) : Boolean = {
 | 
| 54 | 68 | println(ts) | 
| 51 | 69 |   if (ts == List(NT("E"))) true
 | 
| 50 | 70 |   else {
 | 
| 51 | 71 | val tss = for ((lhs, rhs) <- g) yield replace(ts, rhs, List(NT(lhs))) | 
| 72 | tss.flatten.exists(parse(g, _)) | |
| 50 | 73 | } | 
| 74 | } | |
| 75 | ||
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 76 | def parser(g: Grammar, s: String) = {
 | 
| 51 | 77 |   println("\n")
 | 
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 78 | parse(g, Tok.fromString(s)) | 
| 51 | 79 | } | 
| 80 | ||
| 50 | 81 | |
| 51 | 82 | |
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 83 | parser(grammar, "2 + 3 * 4 + 1") | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 84 | parser(grammar, "(2 + 3) * (4 + 1)") | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
61diff
changeset | 85 | parser(grammar, "(2 + 3) * 4 (4 + 1)") | 
| 50 | 86 | |
| 87 | ||
| 88 |