solutions/cw4/parser.sc
changeset 920 7af2eea19646
parent 894 02ef5c3abc51
child 959 64ec1884d860
--- a/solutions/cw4/parser.sc	Sun Sep 17 19:12:57 2023 +0100
+++ b/solutions/cw4/parser.sc	Tue Sep 19 09:54:41 2023 +0100
@@ -3,33 +3,65 @@
 import $file.lexer
 import lexer._ 
 
+case class ~[+A, +B](x: A, y: B)
 
-case class ~[+A, +B](_1: A, _2: B)
-type IsSeq[A] = A => Seq[_]
+// parser combinators
+
+abstract class Parser[I, T](using is: I => Seq[_])  {
+  def parse(in: I): Set[(T, I)]  
+
+  def parse_all(in: I) : Set[T] =
+    for ((hd, tl) <- parse(in); 
+        if is(tl).isEmpty) yield hd
+}
 
-abstract class Parser[I : IsSeq, T] {
-  def parse(ts: I): Set[(T, I)]
+// alternative parser
+class AltParser[I, T](p: => Parser[I, T], 
+                      q: => Parser[I, T])(using I => Seq[_]) extends Parser[I, T] {
+  def parse(in: I) = p.parse(in) ++ q.parse(in)   
+}
 
-  def parse_all(ts: I) : Set[T] =
-    for ((head, tail) <- parse(ts); if tail.isEmpty) yield head
+// sequence parser
+class SeqParser[I, T, S](p: => Parser[I, T], 
+                         q: => Parser[I, S])(using I => Seq[_]) extends Parser[I, ~[T, S]] {
+  def parse(in: I) = 
+    for ((hd1, tl1) <- p.parse(in); 
+         (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)
 }
 
-class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
-  def parse(sb: I) = 
-    for ((head1, tail1) <- p.parse(sb); 
-         (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2)
+// map parser
+class MapParser[I, T, S](p: => Parser[I, T], 
+                         f: T => S)(using I => Seq[_]) extends Parser[I, S] {
+  def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
+}
+
+// more convenient syntax for parser combinators
+extension [I, T](p: Parser[I, T])(using I => Seq[_]) {
+  def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
+  def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
+  def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
 }
 
-class AltParser[I : IsSeq, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
-  def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
+/*
+// atomic parser for (particular) strings
+case class StrParser(s: String) extends Parser[String, String] {
+  def parse(sb: String) = {
+    val (prefix, suffix) = sb.splitAt(s.length)
+    if (prefix == s) Set((prefix, suffix)) else Set()
+  }
 }
 
-class FunParser[I : IsSeq, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
-  def parse(sb: I) = 
-    for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
-}
+extension (sc: StringContext) 
+  def p(args: Any*) = StrParser(sc.s(args:_*))
+*/
 
-// New parser that takes as input a list of tokens
+case class TokenParser(t: Token) extends Parser[List[Token], Token] {
+    def parse(in: List[Token]) = {
+      // an example of an atomic parser for characters
+      if (!in.isEmpty && in.head == t) Set((t, in.tail)) else Set()
+    }
+}   
+
 case class TokenListParser(ts: List[Token]) extends Parser[List[Token], List[Token]] {
     def parse(tsb: List[Token]) = {
         val (prefix, suffix) = tsb.splitAt(ts.length)
@@ -39,34 +71,17 @@
 
 // Implicit definitions to go from a token 
 // or a list of tokens to a TokenListParser
-implicit def token2parser(t: Token) = TokenListParser(List(t))
-implicit def tokenList2parser(ts: List[Token]) = TokenListParser(ts)
+implicit def token2parser(t: Token) : Parser[List[Token], Token] = 
+  TokenParser(t)
 
-implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new {
-  def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
-  def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
-  def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
+extension (t: Token) {
+    def || (q : => Parser[List[Token], Token]) = 
+      new AltParser[List[Token], Token](t, q)
+    def ~[S](q : => Parser[List[Token], S]) = 
+      new SeqParser[List[Token], Token, S](t, q)  
 }
 
-implicit def TokenOps(t: Token) = new {
-    def || (q : => Parser[List[Token], List[Token]]) = new AltParser[List[Token], List[Token]](List(t), q)
-    def || (qs : List[Token]) = new AltParser[List[Token], List[Token]](List(t), qs)
-    def ==>[S] (f: => List[Token] => S) = new FunParser[List[Token], List[Token], S](List(t), f)
-    def ~[S](q : => Parser[List[Token], S]) =
-        new SeqParser[List[Token], List[Token], S](List(t), q)
-    def ~ (qs : List[Token]) =
-        new SeqParser[List[Token], List[Token], List[Token]](List(t), qs)
-}
 
-implicit def TokenListOps(ts: List[Token]) = new {
-    def || (q : => Parser[List[Token], List[Token]]) = new AltParser[List[Token], List[Token]](ts, q)
-    def || (qs : List[Token]) = new AltParser[List[Token], List[Token]](ts, qs)
-    def ==>[S] (f: => List[Token] => S) = new FunParser[List[Token], List[Token], S](ts, f)
-    def ~[S](q : => Parser[List[Token], S]) =
-        new SeqParser[List[Token], List[Token], S](ts, q)
-    def ~ (qs : List[Token]) =
-        new SeqParser[List[Token], List[Token], List[Token]](ts, qs)
-}
 
 // Abstract Syntax Trees
 abstract class Stmt
@@ -83,6 +98,7 @@
 case class WriteId(s: String) extends Stmt  // for printing values of variables
 case class WriteString(s: String) extends Stmt  // for printing words
 case class For(counter: String, lower: AExp, upper: AExp, code: Block) extends Stmt
+case object Break extends Stmt
 
 
 case class Var(s: String) extends AExp
@@ -117,46 +133,51 @@
 }
 
 // WHILE Language Parsing
+
+// WHILE Language Parsing
 lazy val AExp: Parser[List[Token], AExp] = 
-  (Te ~ T_OP("+") ~ AExp) ==> { case x ~ _ ~ z => Aop("+", x, z): AExp } ||
-  (Te ~ T_OP("-") ~ AExp) ==> { case x ~ _ ~ z => Aop("-", x, z): AExp } || Te
+  (Te ~ T_OP("+") ~ AExp).map{ case x ~ _ ~ z => Aop("+", x, z): AExp } ||
+  (Te ~ T_OP("-") ~ AExp).map{ case x ~ _ ~ z => Aop("-", x, z): AExp } || Te
 lazy val Te: Parser[List[Token], AExp] = 
-  (Fa ~ T_OP("*") ~ Te) ==> { case x ~ _ ~ z => Aop("*", x, z): AExp } || 
-  (Fa ~ T_OP("/") ~ Te) ==> { case x ~ _ ~ z => Aop("/", x, z): AExp } || 
-  (Fa ~ T_OP("%") ~ Te) ==> { case x ~ _ ~ z => Aop("%", x, z): AExp } || Fa  
+  (Fa ~ T_OP("*") ~ Te).map{ case x ~ _ ~ z => Aop("*", x, z): AExp } || 
+  (Fa ~ T_OP("/") ~ Te).map{ case x ~ _ ~ z => Aop("/", x, z): AExp } || 
+  (Fa ~ T_OP("%") ~ Te).map{ case x ~ _ ~ z => Aop("%", x, z): AExp } || Fa  
 lazy val Fa: Parser[List[Token], AExp] = 
-   (T_PAREN("(") ~ AExp ~ T_PAREN(")")) ==> { case _ ~ y ~ _ => y } || 
-   IdParser() ==> Var  || 
-   NumParser() ==> Num
+   (T_PAREN("(") ~ AExp ~ T_PAREN(")")).map{ case _ ~ y ~ _ => y } || 
+   IdParser().map{Var(_)}  || 
+   NumParser().map{Num(_)}
 
 lazy val BExp: Parser[List[Token], BExp] = 
-   (AExp ~ T_OP("==") ~ AExp) ==> { case x ~ _ ~ z => Bop("==", x, z): BExp } || 
-   (AExp ~ T_OP("!=") ~ AExp) ==> { case x ~ _ ~ z => Bop("!=", x, z): BExp } || 
-   (AExp ~ T_OP("<") ~ AExp) ==> { case x ~ _ ~ z => Bop("<", x, z): BExp } || 
-   (AExp ~ T_OP(">") ~ AExp) ==> { case x ~ _ ~ z => Bop(">", x, z): BExp } ||
-   (T_PAREN("(") ~ BExp ~ List(T_PAREN(")"), T_OP("&&")) ~ BExp) ==> { case _ ~ y ~ _ ~ v => And(y, v): BExp } ||
-   (T_PAREN("(") ~ BExp ~ List(T_PAREN(")"), T_OP("||")) ~ BExp) ==> { case _ ~ y ~ _ ~ v => Or(y, v): BExp } ||
-   (T_KEYWORD("true") ==> (_ => True: BExp )) || 
-   (T_KEYWORD("false") ==> (_ => False: BExp )) ||
-   (T_PAREN("(") ~ BExp ~ T_PAREN(")")) ==> { case _ ~ x ~ _ => x }
+   (AExp ~ T_OP("==") ~ AExp).map{ case x ~ _ ~ z => Bop("==", x, z): BExp } || 
+   (AExp ~ T_OP("!=") ~ AExp).map{ case x ~ _ ~ z => Bop("!=", x, z): BExp } || 
+   (AExp ~ T_OP("<") ~ AExp).map{ case x ~ _ ~ z => Bop("<", x, z): BExp } || 
+   (AExp ~ T_OP(">") ~ AExp).map{ case x ~ _ ~ z => Bop(">", x, z): BExp } ||
+   (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("&&") ~ BExp).map{ case _ ~ y ~ _ ~ _ ~ v => And(y, v): BExp } ||
+   (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("||") ~ BExp).map{ case _ ~ y ~ _ ~ _ ~ v => Or(y, v): BExp } ||
+   (T_KEYWORD("true").map(_ => True: BExp )) || 
+   (T_KEYWORD("false").map(_ => False: BExp )) ||
+   (T_PAREN("(") ~ BExp ~ T_PAREN(")")).map{ case _ ~ x ~ _ => x }
 
 lazy val Stmt: Parser[List[Token], Stmt] =
-    T_KEYWORD("skip") ==> (_ => Skip: Stmt) ||
-    (IdParser() ~ T_OP(":=") ~ AExp) ==> { case id ~ _ ~ z => Assign(id, z): Stmt } ||
-    (T_KEYWORD("if") ~ BExp ~ T_KEYWORD("then") ~ Block ~ T_KEYWORD("else") ~ Block) ==> { case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w): Stmt } ||
-    (T_KEYWORD("while") ~ BExp ~ T_KEYWORD("do") ~ Block) ==> { case _ ~ y ~ _ ~ w => While(y, w) : Stmt } ||
-    (T_KEYWORD("read") ~ IdParser()) ==> { case _ ~ id => Read(id): Stmt} ||
-    (T_KEYWORD("write") ~ IdParser()) ==> { case _ ~ id => WriteId(id): Stmt} ||
-    (T_KEYWORD("write") ~ StringParser()) ==> { case _ ~ s => WriteString(s): Stmt} ||
-    (T_KEYWORD("for") ~ IdParser() ~ T_OP(":=") ~ AExp ~ T_KEYWORD("upto") ~ AExp ~ T_KEYWORD("do") ~ Block) ==> {
-      case _ ~ id ~ _ ~ lower ~ _ ~ upper ~ _ ~ blck => For(id, lower, upper, blck): Stmt
-    }
+    T_KEYWORD("skip").map(_ => Skip: Stmt) ||
+    T_KEYWORD("break").map(_ => Break: Stmt) ||
+    (IdParser() ~ T_OP(":=") ~ AExp).map{ case id ~ _ ~ z => Assign(id, z): Stmt } ||
+    (T_KEYWORD("if") ~ BExp ~ T_KEYWORD("then") ~ Block ~ T_KEYWORD("else") ~ Block).map{ case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w): Stmt } ||
+    (T_KEYWORD("while") ~ BExp ~ T_KEYWORD("do") ~ Block).map{ case _ ~ y ~ _ ~ w => While(y, w) : Stmt } ||
+    (T_KEYWORD("for") ~ IdParser() ~ T_OP(":=") ~ AExp ~T_KEYWORD("upto") ~ AExp ~ T_KEYWORD("do") ~ Block).map{ 
+        case _ ~ id ~ _ ~ low ~ _ ~ high ~ _ ~ bl => For(id, low, high, bl) : Stmt } ||
+    (T_KEYWORD("read") ~ IdParser()).map{ case _ ~ id => Read(id): Stmt} ||
+    (T_KEYWORD("write") ~ IdParser()).map{ case _ ~ id => WriteId(id): Stmt} ||
+    (T_KEYWORD("write") ~ StringParser()).map{ case _ ~ s => WriteString(s): Stmt} || 
+    (T_KEYWORD("write") ~ T_PAREN("(") ~ IdParser() ~ T_PAREN(")")).map{ case _ ~ _ ~ id ~ _ => WriteId(id): Stmt} ||
+    (T_KEYWORD("write") ~  T_PAREN("(") ~ StringParser() ~ T_PAREN(")")).map{ case _ ~ _ ~ s ~ _ => WriteString(s): Stmt}
 
 lazy val Stmts: Parser[List[Token], Block] =
-    (Stmt ~ T_SEMI ~ Stmts) ==> { case x ~ _ ~ z => x :: z : Block } ||
-    (Stmt ==> (s => List(s) : Block))
+    (Stmt ~ T_SEMI ~ Stmts).map{ case x ~ _ ~ z => x :: z : Block } ||
+    (Stmt.map(s => List(s) : Block))
 
 lazy val Block: Parser[List[Token], Block] =
-    (T_PAREN("{") ~ Stmts ~ T_PAREN("}")) ==> { case x ~ y ~ z => y} ||
-    (Stmt ==> (s => List(s)))
+    (T_PAREN("{") ~ Stmts ~ T_PAREN("}")).map{ case x ~ y ~ z => y} ||
+    (Stmt.map(s => List(s)))
 
+