| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Mon, 20 Oct 2025 22:18:21 +0200 | |
| changeset 1013 | 1a23d87d1700 | 
| parent 754 | 1c9a23304b85 | 
| permissions | -rw-r--r-- | 
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 1 | // Parser combinators including semantic actions | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 2 | // parses lists of tokens | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 3 | // | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 4 | // Needs | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 5 | // :load matcher.scala | 
| 62 | 6 | |
| 7 | // some regular expressions | |
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 8 | val LETTER = RANGE("abcdefghijklmnopqrstuvwxyz")
 | 
| 62 | 9 | val ID = PLUS(LETTER) | 
| 10 | ||
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 11 | val DIGIT = RANGE("0123456789")
 | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 12 | val NONZERODIGIT = RANGE("123456789")
 | 
| 62 | 13 | val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") | 
| 14 | ||
| 15 | val LPAREN = CHAR('(')
 | |
| 16 | val RPAREN = CHAR(')')
 | |
| 17 | ||
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 18 | val WHITESPACE = PLUS(RANGE(" \n"))
 | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 19 | val OPS = RANGE("+-*")
 | 
| 62 | 20 | |
| 21 | // for classifying the strings that have been recognised | |
| 22 | abstract class Token | |
| 23 | ||
| 24 | case object T_WHITESPACE extends Token | |
| 25 | case class T_NUM(s: String) extends Token | |
| 26 | case class T_ID(s: String) extends Token | |
| 27 | case class T_OP(s: String) extends Token | |
| 28 | case object T_LPAREN extends Token | |
| 29 | case object T_RPAREN extends Token | |
| 30 | case object T_IF extends Token | |
| 31 | case object T_THEN extends Token | |
| 32 | case object T_ELSE extends Token | |
| 33 | ||
| 34 | // lexing rules for arithmetic expressions | |
| 35 | val lexing_rules: List[Rule[Token]]= | |
| 36 |   List(("if", (s) => T_IF),
 | |
| 37 |        ("then", (s) => T_THEN),
 | |
| 38 |        ("else", (s) => T_ELSE),
 | |
| 39 | (NUMBER, (s) => T_NUM(s.mkString)), | |
| 40 | (ID, (s) => T_ID(s.mkString)), | |
| 41 | (WHITESPACE, (s) => T_WHITESPACE), | |
| 42 | (LPAREN, (s) => T_LPAREN), | |
| 43 | (RPAREN, (s) => T_RPAREN), | |
| 44 | (OPS, (s) => T_OP(s.mkString))) | |
| 45 | ||
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 46 | val Tok = Tokenizer(lexing_rules, List(T_WHITESPACE)) | 
| 62 | 47 | |
| 48 | // parser combinators with return type T | |
| 49 | abstract class Parser[T] {
 | |
| 50 | def parse(ts: List[Token]): Set[(T, List[Token])] | |
| 51 | ||
| 52 | def parse_all(ts: List[Token]) : Set[T] = | |
| 53 | for ((head, tail) <- parse(ts); if (tail == Nil)) yield head | |
| 54 | ||
| 55 | def || (right : => Parser[T]) : Parser[T] = new AltParser(this, right) | |
| 56 | def ==>[S] (f: => T => S) : Parser [S] = new FunParser(this, f) | |
| 57 | def ~[S] (right : => Parser[S]) : Parser[(T, S)] = new SeqParser(this, right) | |
| 58 | def ~>[S] (right : => Parser[S]) : Parser[S] = this ~ right ==> (x => x._2) | |
| 59 | def <~[S] (right : => Parser[S]) : Parser[T] = this ~ right ==> (x => x._1) | |
| 60 | ||
| 61 | } | |
| 62 | ||
| 63 | class SeqParser[T, S](p: => Parser[T], q: => Parser[S]) extends Parser[(T, S)] {
 | |
| 64 | def parse(sb: List[Token]) = | |
| 65 | for ((head1, tail1) <- p.parse(sb); | |
| 66 | (head2, tail2) <- q.parse(tail1)) yield ((head1, head2), tail2) | |
| 67 | } | |
| 68 | ||
| 69 | class AltParser[T](p: => Parser[T], q: => Parser[T]) extends Parser[T] {
 | |
| 70 | def parse (sb: List[Token]) = p.parse(sb) ++ q.parse(sb) | |
| 71 | } | |
| 72 | ||
| 73 | class FunParser[T, S](p: => Parser[T], f: T => S) extends Parser[S] {
 | |
| 74 | def parse (sb: List[Token]) = | |
| 75 | for ((head, tail) <- p.parse(sb)) yield (f(head), tail) | |
| 76 | } | |
| 77 | ||
| 78 | ||
| 79 | case class TokParser(tok: Token) extends Parser[Token] {
 | |
| 80 |   def parse(ts: List[Token]) = ts match {
 | |
| 81 | case t::ts if (t == tok) => Set((t, ts)) | |
| 82 | case _ => Set () | |
| 83 | } | |
| 84 | } | |
| 85 | ||
| 86 | implicit def token2tparser(t: Token) = TokParser(t) | |
| 87 | ||
| 88 | case object NumParser extends Parser[Int] {
 | |
| 89 |   def parse(ts: List[Token]) = ts match {
 | |
| 90 | case T_NUM(s)::ts => Set((s.toInt, ts)) | |
| 91 | case _ => Set () | |
| 92 | } | |
| 93 | } | |
| 94 | ||
| 64 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
62diff
changeset | 95 | lazy val E: Parser[Int] = (T ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z } || T  
 | 
| 62 | 96 | lazy val T: Parser[Int] = (F ~ T_OP("*") ~ T) ==> { case ((x, y), z) => x * z } || F
 | 
| 97 | lazy val F: Parser[Int] = (T_LPAREN ~> E <~ T_RPAREN) || NumParser | |
| 98 | ||
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 99 | println(E.parse_all(Tok.fromString("1 + 2 + 3")))
 | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 100 | println(E.parse_all(Tok.fromString("1 + 2 * 3")))
 | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 101 | println(E.parse_all(Tok.fromString("(1 + 2) * 3")))
 | 
| 62 | 102 | |
| 71 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 103 | // Excercise: implement minus | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 104 | println(E.parse_all(Tok.fromString("(1 - 2) * 3")))
 | 
| 
7717f20f0504
updated
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 
64diff
changeset | 105 | println(E.parse_all(Tok.fromString("(1 + 2) * - 3")))
 |