diff -r 53e7da9f372a -r 53f08d873e09 solutions/cw3/parser.sc --- a/solutions/cw3/parser.sc Fri Sep 15 10:49:33 2023 +0100 +++ b/solutions/cw3/parser.sc Sun Sep 17 19:12:57 2023 +0100 @@ -4,32 +4,67 @@ import lexer._ -case class ~[+A, +B](_1: A, _2: B) -type IsSeq[A] = A => Seq[_] +case class ~[+A, +B](x: A, y: B) + +// parser combinators + +abstract class Parser[I, T](using is: I => Seq[_]) { + def parse(in: I): Set[(T, I)] + + def parse_all(in: I) : Set[T] = + for ((hd, tl) <- parse(in); + if is(tl).isEmpty) yield hd +} -abstract class Parser[I : IsSeq, T] { - def parse(ts: I): Set[(T, I)] +// alternative parser +class AltParser[I, T](p: => Parser[I, T], + q: => Parser[I, T])(using I => Seq[_]) extends Parser[I, T] { + def parse(in: I) = p.parse(in) ++ q.parse(in) +} - def parse_all(ts: I) : Set[T] = - for ((head, tail) <- parse(ts); if tail.isEmpty) yield head +// sequence parser +class SeqParser[I, T, S](p: => Parser[I, T], + q: => Parser[I, S])(using I => Seq[_]) extends Parser[I, ~[T, S]] { + def parse(in: I) = + for ((hd1, tl1) <- p.parse(in); + (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2) } -class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, ~[T, S]] { - def parse(sb: I) = - for ((head1, tail1) <- p.parse(sb); - (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2) +// map parser +class MapParser[I, T, S](p: => Parser[I, T], + f: T => S)(using I => Seq[_]) extends Parser[I, S] { + def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) } -class AltParser[I : IsSeq, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { - def parse(sb: I) = p.parse(sb) ++ q.parse(sb) + +/* +// atomic parser for (particular) strings +case class StrParser(s: String) extends Parser[String, String] { + def parse(sb: String) = { + val (prefix, suffix) = sb.splitAt(s.length) + if (prefix == s) Set((prefix, suffix)) else Set() + } } -class FunParser[I : IsSeq, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { - def parse(sb: I) = - for ((head, tail) <- p.parse(sb)) yield (f(head), tail) +extension (sc: StringContext) + def p(args: Any*) = StrParser(sc.s(args:_*)) +*/ + +// more convenient syntax for parser combinators +extension [I, T](p: Parser[I, T])(using I => Seq[_]) { + def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) + def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) + def map[S](f: => T => S) = new MapParser[I, T, S](p, f) } // New parser that takes as input a list of tokens +case class TokenParser(t: Token) extends Parser[List[Token], Token] { + def parse(in: List[Token]) = { + // an example of an atomic parser for characters + if (!in.isEmpty && in.head == t) Set((t, in.tail)) else Set() + } +} + case class TokenListParser(ts: List[Token]) extends Parser[List[Token], List[Token]] { def parse(tsb: List[Token]) = { val (prefix, suffix) = tsb.splitAt(ts.length) @@ -39,34 +74,16 @@ // Implicit definitions to go from a token // or a list of tokens to a TokenListParser -implicit def token2parser(t: Token) = TokenListParser(List(t)) -implicit def tokenList2parser(ts: List[Token]) = TokenListParser(ts) +implicit def token2parser(t: Token) : Parser[List[Token], Token] = + TokenParser(t) -implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new { - def || (q : => Parser[I, T]) = new AltParser[I, T](p, q) - def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f) - def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) +extension (t: Token) { + def || (q : => Parser[List[Token], Token]) = + new AltParser[List[Token], Token](t, q) + def ~[S](q : => Parser[List[Token], S]) = + new SeqParser[List[Token], Token, S](t, q) } -implicit def TokenOps(t: Token) = new { - def || (q : => Parser[List[Token], List[Token]]) = new AltParser[List[Token], List[Token]](List(t), q) - def || (qs : List[Token]) = new AltParser[List[Token], List[Token]](List(t), qs) - def ==>[S] (f: => List[Token] => S) = new FunParser[List[Token], List[Token], S](List(t), f) - def ~[S](q : => Parser[List[Token], S]) = - new SeqParser[List[Token], List[Token], S](List(t), q) - def ~ (qs : List[Token]) = - new SeqParser[List[Token], List[Token], List[Token]](List(t), qs) -} - -implicit def TokenListOps(ts: List[Token]) = new { - def || (q : => Parser[List[Token], List[Token]]) = new AltParser[List[Token], List[Token]](ts, q) - def || (qs : List[Token]) = new AltParser[List[Token], List[Token]](ts, qs) - def ==>[S] (f: => List[Token] => S) = new FunParser[List[Token], List[Token], S](ts, f) - def ~[S](q : => Parser[List[Token], S]) = - new SeqParser[List[Token], List[Token], S](ts, q) - def ~ (qs : List[Token]) = - new SeqParser[List[Token], List[Token], List[Token]](ts, qs) -} // Abstract Syntax Trees abstract class Stmt @@ -114,48 +131,49 @@ } } + // WHILE Language Parsing lazy val AExp: Parser[List[Token], AExp] = - (Te ~ T_OP("+") ~ AExp) ==> { case x ~ _ ~ z => Aop("+", x, z): AExp } || - (Te ~ T_OP("-") ~ AExp) ==> { case x ~ _ ~ z => Aop("-", x, z): AExp } || Te + (Te ~ T_OP("+") ~ AExp).map{ case x ~ _ ~ z => Aop("+", x, z): AExp } || + (Te ~ T_OP("-") ~ AExp).map{ case x ~ _ ~ z => Aop("-", x, z): AExp } || Te lazy val Te: Parser[List[Token], AExp] = - (Fa ~ T_OP("*") ~ Te) ==> { case x ~ _ ~ z => Aop("*", x, z): AExp } || - (Fa ~ T_OP("/") ~ Te) ==> { case x ~ _ ~ z => Aop("/", x, z): AExp } || - (Fa ~ T_OP("%") ~ Te) ==> { case x ~ _ ~ z => Aop("%", x, z): AExp } || Fa + (Fa ~ T_OP("*") ~ Te).map{ case x ~ _ ~ z => Aop("*", x, z): AExp } || + (Fa ~ T_OP("/") ~ Te).map{ case x ~ _ ~ z => Aop("/", x, z): AExp } || + (Fa ~ T_OP("%") ~ Te).map{ case x ~ _ ~ z => Aop("%", x, z): AExp } || Fa lazy val Fa: Parser[List[Token], AExp] = - (T_PAREN("(") ~ AExp ~ T_PAREN(")")) ==> { case _ ~ y ~ _ => y } || - IdParser() ==> Var || - NumParser() ==> Num + (T_PAREN("(") ~ AExp ~ T_PAREN(")")).map{ case _ ~ y ~ _ => y } || + IdParser().map{Var(_)} || + NumParser().map{Num(_)} lazy val BExp: Parser[List[Token], BExp] = - (AExp ~ T_OP("==") ~ AExp) ==> { case x ~ _ ~ z => Bop("==", x, z): BExp } || - (AExp ~ T_OP("!=") ~ AExp) ==> { case x ~ _ ~ z => Bop("!=", x, z): BExp } || - (AExp ~ T_OP("<") ~ AExp) ==> { case x ~ _ ~ z => Bop("<", x, z): BExp } || - (AExp ~ T_OP(">") ~ AExp) ==> { case x ~ _ ~ z => Bop(">", x, z): BExp } || - (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("&&") ~ BExp) ==> { case _ ~ y ~ _ ~ _ ~ v => And(y, v): BExp } || - (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("||") ~ BExp) ==> { case _ ~ y ~ _ ~ _ ~ v => Or(y, v): BExp } || - (T_KEYWORD("true") ==> (_ => True: BExp )) || - (T_KEYWORD("false") ==> (_ => False: BExp )) || - (T_PAREN("(") ~ BExp ~ T_PAREN(")")) ==> { case _ ~ x ~ _ => x } + (AExp ~ T_OP("==") ~ AExp).map{ case x ~ _ ~ z => Bop("==", x, z): BExp } || + (AExp ~ T_OP("!=") ~ AExp).map{ case x ~ _ ~ z => Bop("!=", x, z): BExp } || + (AExp ~ T_OP("<") ~ AExp).map{ case x ~ _ ~ z => Bop("<", x, z): BExp } || + (AExp ~ T_OP(">") ~ AExp).map{ case x ~ _ ~ z => Bop(">", x, z): BExp } || + (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("&&") ~ BExp).map{ case _ ~ y ~ _ ~ _ ~ v => And(y, v): BExp } || + (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("||") ~ BExp).map{ case _ ~ y ~ _ ~ _ ~ v => Or(y, v): BExp } || + (T_KEYWORD("true").map(_ => True: BExp )) || + (T_KEYWORD("false").map(_ => False: BExp )) || + (T_PAREN("(") ~ BExp ~ T_PAREN(")")).map{ case _ ~ x ~ _ => x } lazy val Stmt: Parser[List[Token], Stmt] = - T_KEYWORD("skip") ==> (_ => Skip: Stmt) || - (IdParser() ~ T_OP(":=") ~ AExp) ==> { case id ~ _ ~ z => Assign(id, z): Stmt } || - (T_KEYWORD("if") ~ BExp ~ T_KEYWORD("then") ~ Block ~ T_KEYWORD("else") ~ Block) ==> { case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w): Stmt } || - (T_KEYWORD("while") ~ BExp ~ T_KEYWORD("do") ~ Block) ==> { case _ ~ y ~ _ ~ w => While(y, w) : Stmt } || - (T_KEYWORD("read") ~ IdParser()) ==> { case _ ~ id => Read(id): Stmt} || - (T_KEYWORD("write") ~ IdParser()) ==> { case _ ~ id => WriteId(id): Stmt} || - (T_KEYWORD("write") ~ StringParser()) ==> { case _ ~ s => WriteString(s): Stmt} || - (T_KEYWORD("write") ~ T_PAREN("(") ~ IdParser() ~ T_PAREN(")")) ==> { case _ ~ _ ~ id ~ _ => WriteId(id): Stmt} || - (T_KEYWORD("write") ~ T_PAREN("(") ~ StringParser() ~ T_PAREN(")")) ==> { case _ ~ _ ~ s ~ _ => WriteString(s): Stmt} + T_KEYWORD("skip").map(_ => Skip: Stmt) || + (IdParser() ~ T_OP(":=") ~ AExp).map{ case id ~ _ ~ z => Assign(id, z): Stmt } || + (T_KEYWORD("if") ~ BExp ~ T_KEYWORD("then") ~ Block ~ T_KEYWORD("else") ~ Block).map{ case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w): Stmt } || + (T_KEYWORD("while") ~ BExp ~ T_KEYWORD("do") ~ Block).map{ case _ ~ y ~ _ ~ w => While(y, w) : Stmt } || + (T_KEYWORD("read") ~ IdParser()).map{ case _ ~ id => Read(id): Stmt} || + (T_KEYWORD("write") ~ IdParser()).map{ case _ ~ id => WriteId(id): Stmt} || + (T_KEYWORD("write") ~ StringParser()).map{ case _ ~ s => WriteString(s): Stmt} || + (T_KEYWORD("write") ~ T_PAREN("(") ~ IdParser() ~ T_PAREN(")")).map{ case _ ~ _ ~ id ~ _ => WriteId(id): Stmt} || + (T_KEYWORD("write") ~ T_PAREN("(") ~ StringParser() ~ T_PAREN(")")).map{ case _ ~ _ ~ s ~ _ => WriteString(s): Stmt} lazy val Stmts: Parser[List[Token], Block] = - (Stmt ~ T_SEMI ~ Stmts) ==> { case x ~ _ ~ z => x :: z : Block } || - (Stmt ==> (s => List(s) : Block)) + (Stmt ~ T_SEMI ~ Stmts).map{ case x ~ _ ~ z => x :: z : Block } || + (Stmt.map(s => List(s) : Block)) lazy val Block: Parser[List[Token], Block] = - (T_PAREN("{") ~ Stmts ~ T_PAREN("}")) ==> { case x ~ y ~ z => y} || - (Stmt ==> (s => List(s))) + (T_PAREN("{") ~ Stmts ~ T_PAREN("}")).map{ case x ~ y ~ z => y} || + (Stmt.map(s => List(s))) // Testing with programs 2 & 3