| author | Christian Urban <christian.urban@kcl.ac.uk> | 
| Mon, 20 Oct 2025 22:18:21 +0200 | |
| changeset 1013 | 1a23d87d1700 | 
| parent 754 | 1c9a23304b85 | 
| permissions | -rw-r--r-- | 
| 64 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 1 | val DIGIT = RANGE("0123456789")
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 2 | val NONZERODIGIT = RANGE("123456789")
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 3 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 4 | val NUMBER = ALT(SEQ(NONZERODIGIT, STAR(DIGIT)), "0") | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 5 | val LPAREN = CHAR('(')
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 6 | val RPAREN = CHAR(')')
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 7 | val WHITESPACE = PLUS(RANGE(" \n"))
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 8 | val OPS = RANGE("+-*")
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 9 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 10 | // for classifying the strings that have been recognised | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 11 | abstract class Token | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 12 | case object T_WHITESPACE extends Token | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 13 | case class T_NUM(s: String) extends Token | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 14 | case class T_OP(s: String) extends Token | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 15 | case object T_LPAREN extends Token | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 16 | case object T_RPAREN extends Token | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 17 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 18 | val lexing_rules: List[Rule[Token]]= | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 19 | List((NUMBER, (s) => T_NUM(s.mkString)), | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 20 | (WHITESPACE, (s) => T_WHITESPACE), | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 21 | (LPAREN, (s) => T_LPAREN), | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 22 | (RPAREN, (s) => T_RPAREN), | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 23 | (OPS, (s) => T_OP(s.mkString))) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 24 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 25 | val Tk = Tokenizer(lexing_rules, List(T_WHITESPACE)) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 26 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 27 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 28 | // parser combinators with input type I and return type T | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 29 | // and memoisation | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 30 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 31 | case class SubList[T](s: List[T], l: Int, h: Int) {
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 32 | def low = l | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 33 | def high = h | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 34 | def length = h - l | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 35 | def sublist(l: Int = l, h: Int = h) = s.slice(l, h) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 36 | def set(low: Int = l, high: Int = h) = SubList(s, low, high) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 37 | } | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 38 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 39 | type Ctxt[T] = List[(String, SubList[T])] | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 40 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 41 | abstract class Parser[I, T] {
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 42 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 43 | def parse(ts: SubList[I], ctxt: Ctxt[I]): Set[(T, SubList[I])] | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 44 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 45 | def parse_all(s: List[I]) : Set[T] = | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 46 | for ((head, tail) <- parse(SubList(s, 0, s.length), Nil); if (tail.sublist() == Nil)) yield head | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 47 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 48 | def || (right : => Parser[I, T]) : Parser[I, T] = new AltParser(this, right) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 49 | def ==>[S] (f: => T => S) : Parser [I, S] = new FunParser(this, f) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 50 | def ~[S] (right : => Parser[I, S]) : Parser[I, (T, S)] = new SeqParser(this, right) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 51 | def ~>[S] (right : => Parser[I, S]) : Parser[I, S] = this ~ right ==> (_._2) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 52 | def <~[S] (right : => Parser[I, S]) : Parser[I, T] = this ~ right ==> (_._1) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 53 | } | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 54 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 55 | class SeqParser[I, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] {
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 56 | def parse(sb: SubList[I], ctxt: Ctxt[I]) = | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 57 | for ((head1, tail1) <- p.parse(sb, ctxt); | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 58 | (head2, tail2) <- q.parse(tail1, ctxt)) yield ((head1, head2), tail2) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 59 | } | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 60 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 61 | class AltParser[I, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 62 | def parse(sb: SubList[I], ctxt: Ctxt[I]) = p.parse(sb, ctxt) ++ q.parse(sb, ctxt) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 63 | } | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 64 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 65 | class FunParser[I, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 66 | def parse(sb: SubList[I], ctxt: Ctxt[I]) = | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 67 | for ((head, tail) <- p.parse(sb, ctxt)) yield (f(head), tail) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 68 | } | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 69 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 70 | case object NumParser extends Parser[Token, Int] {
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 71 |   def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = {
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 72 |     if (0 < sb.length) sb.sublist(sb.low, sb.low + 1) match { 
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 73 | case T_NUM(i)::Nil => Set((i.toInt, sb.set(low = sb.low + 1))) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 74 | case _ => Set() | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 75 | } | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 76 | else Set() | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 77 | } | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 78 | } | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 79 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 80 | case class TokParser(t: Token) extends Parser[Token, Token] {
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 81 |   def parse(sb: SubList[Token], ctxt: Ctxt[Token]) = {
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 82 | if (0 < sb.length && sb.sublist(sb.low, sb.low + 1) == List(t)) Set((t, sb.set(low = sb.low + 1))) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 83 | else Set() | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 84 | } | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 85 | } | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 86 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 87 | implicit def token2tparser(t: Token) = TokParser(t) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 88 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 89 | class IgnLst[I, T](p: => Parser[I, T]) extends Parser[I, T] {
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 90 |   def parse(sb: SubList[I], ctxt: Ctxt[I]) = {
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 91 | if (sb.length == 0) Set() | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 92 | else for ((head, tail) <- p.parse(sb.set(high = sb.high - 1), ctxt)) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 93 | yield (head, tail.set(high = tail.high + 1)) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 94 | } | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 95 | } | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 96 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 97 | class CHECK[I, T](nt: String, p: => Parser[I, T]) extends Parser[I, T] {
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 98 |   def parse(sb: SubList[I], ctxt: Ctxt[I]) = {
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 99 | val should_trim = ctxt.contains (nt, sb) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 100 | if (should_trim && sb.length == 0) Set() | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 101 | else if (should_trim) new IgnLst(p).parse(sb, (nt, sb)::ctxt) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 102 | else p.parse(sb, (nt, sb)::ctxt) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 103 | } | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 104 | } | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 105 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 106 | lazy val E: Parser[Token, Int] = | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 107 |   new CHECK("E", (E ~ T_OP("+") ~ E) ==> { case ((x, y), z) => x + z} || 
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 108 |                  (E ~ T_OP("*") ~ E) ==> { case ((x, y), z) => x * z} ||
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 109 |                  (T_LPAREN ~ E ~ T_RPAREN) ==> { case ((x, y), z) => y} ||
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 110 | NumParser) | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 111 | |
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 112 | println(E.parse_all(Tk.fromString("1 + 2 * 3")))
 | 
| 
2d625418c011
added everything
 Christian Urban <christian dot urban at kcl dot ac dot uk> parents: diff
changeset | 113 | println(E.parse_all(Tk.fromString("(1 + 2) * 3")))
 |