solutions/cw4/parser.sc
changeset 920 7af2eea19646
parent 894 02ef5c3abc51
child 959 64ec1884d860
equal deleted inserted replaced
919:53f08d873e09 920:7af2eea19646
     1 // CW3
     1 // CW3
     2 
     2 
     3 import $file.lexer
     3 import $file.lexer
     4 import lexer._ 
     4 import lexer._ 
     5 
     5 
       
     6 case class ~[+A, +B](x: A, y: B)
     6 
     7 
     7 case class ~[+A, +B](_1: A, _2: B)
     8 // parser combinators
     8 type IsSeq[A] = A => Seq[_]
       
     9 
     9 
    10 abstract class Parser[I : IsSeq, T] {
    10 abstract class Parser[I, T](using is: I => Seq[_])  {
    11   def parse(ts: I): Set[(T, I)]
    11   def parse(in: I): Set[(T, I)]  
    12 
    12 
    13   def parse_all(ts: I) : Set[T] =
    13   def parse_all(in: I) : Set[T] =
    14     for ((head, tail) <- parse(ts); if tail.isEmpty) yield head
    14     for ((hd, tl) <- parse(in); 
       
    15         if is(tl).isEmpty) yield hd
    15 }
    16 }
    16 
    17 
    17 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
    18 // alternative parser
    18   def parse(sb: I) = 
    19 class AltParser[I, T](p: => Parser[I, T], 
    19     for ((head1, tail1) <- p.parse(sb); 
    20                       q: => Parser[I, T])(using I => Seq[_]) extends Parser[I, T] {
    20          (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2)
    21   def parse(in: I) = p.parse(in) ++ q.parse(in)   
    21 }
    22 }
    22 
    23 
    23 class AltParser[I : IsSeq, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
    24 // sequence parser
    24   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
    25 class SeqParser[I, T, S](p: => Parser[I, T], 
       
    26                          q: => Parser[I, S])(using I => Seq[_]) extends Parser[I, ~[T, S]] {
       
    27   def parse(in: I) = 
       
    28     for ((hd1, tl1) <- p.parse(in); 
       
    29          (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)
    25 }
    30 }
    26 
    31 
    27 class FunParser[I : IsSeq, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
    32 // map parser
    28   def parse(sb: I) = 
    33 class MapParser[I, T, S](p: => Parser[I, T], 
    29     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
    34                          f: T => S)(using I => Seq[_]) extends Parser[I, S] {
       
    35   def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
    30 }
    36 }
    31 
    37 
    32 // New parser that takes as input a list of tokens
    38 // more convenient syntax for parser combinators
       
    39 extension [I, T](p: Parser[I, T])(using I => Seq[_]) {
       
    40   def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
       
    41   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
       
    42   def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
       
    43 }
       
    44 
       
    45 /*
       
    46 // atomic parser for (particular) strings
       
    47 case class StrParser(s: String) extends Parser[String, String] {
       
    48   def parse(sb: String) = {
       
    49     val (prefix, suffix) = sb.splitAt(s.length)
       
    50     if (prefix == s) Set((prefix, suffix)) else Set()
       
    51   }
       
    52 }
       
    53 
       
    54 extension (sc: StringContext) 
       
    55   def p(args: Any*) = StrParser(sc.s(args:_*))
       
    56 */
       
    57 
       
    58 case class TokenParser(t: Token) extends Parser[List[Token], Token] {
       
    59     def parse(in: List[Token]) = {
       
    60       // an example of an atomic parser for characters
       
    61       if (!in.isEmpty && in.head == t) Set((t, in.tail)) else Set()
       
    62     }
       
    63 }   
       
    64 
    33 case class TokenListParser(ts: List[Token]) extends Parser[List[Token], List[Token]] {
    65 case class TokenListParser(ts: List[Token]) extends Parser[List[Token], List[Token]] {
    34     def parse(tsb: List[Token]) = {
    66     def parse(tsb: List[Token]) = {
    35         val (prefix, suffix) = tsb.splitAt(ts.length)
    67         val (prefix, suffix) = tsb.splitAt(ts.length)
    36         if (prefix == ts) Set((prefix, suffix)) else Set()
    68         if (prefix == ts) Set((prefix, suffix)) else Set()
    37     }
    69     }
    38 }
    70 }
    39 
    71 
    40 // Implicit definitions to go from a token 
    72 // Implicit definitions to go from a token 
    41 // or a list of tokens to a TokenListParser
    73 // or a list of tokens to a TokenListParser
    42 implicit def token2parser(t: Token) = TokenListParser(List(t))
    74 implicit def token2parser(t: Token) : Parser[List[Token], Token] = 
    43 implicit def tokenList2parser(ts: List[Token]) = TokenListParser(ts)
    75   TokenParser(t)
    44 
    76 
    45 implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new {
    77 extension (t: Token) {
    46   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
    78     def || (q : => Parser[List[Token], Token]) = 
    47   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
    79       new AltParser[List[Token], Token](t, q)
    48   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
    80     def ~[S](q : => Parser[List[Token], S]) = 
       
    81       new SeqParser[List[Token], Token, S](t, q)  
    49 }
    82 }
    50 
    83 
    51 implicit def TokenOps(t: Token) = new {
       
    52     def || (q : => Parser[List[Token], List[Token]]) = new AltParser[List[Token], List[Token]](List(t), q)
       
    53     def || (qs : List[Token]) = new AltParser[List[Token], List[Token]](List(t), qs)
       
    54     def ==>[S] (f: => List[Token] => S) = new FunParser[List[Token], List[Token], S](List(t), f)
       
    55     def ~[S](q : => Parser[List[Token], S]) =
       
    56         new SeqParser[List[Token], List[Token], S](List(t), q)
       
    57     def ~ (qs : List[Token]) =
       
    58         new SeqParser[List[Token], List[Token], List[Token]](List(t), qs)
       
    59 }
       
    60 
    84 
    61 implicit def TokenListOps(ts: List[Token]) = new {
       
    62     def || (q : => Parser[List[Token], List[Token]]) = new AltParser[List[Token], List[Token]](ts, q)
       
    63     def || (qs : List[Token]) = new AltParser[List[Token], List[Token]](ts, qs)
       
    64     def ==>[S] (f: => List[Token] => S) = new FunParser[List[Token], List[Token], S](ts, f)
       
    65     def ~[S](q : => Parser[List[Token], S]) =
       
    66         new SeqParser[List[Token], List[Token], S](ts, q)
       
    67     def ~ (qs : List[Token]) =
       
    68         new SeqParser[List[Token], List[Token], List[Token]](ts, qs)
       
    69 }
       
    70 
    85 
    71 // Abstract Syntax Trees
    86 // Abstract Syntax Trees
    72 abstract class Stmt
    87 abstract class Stmt
    73 abstract class AExp
    88 abstract class AExp
    74 abstract class BExp
    89 abstract class BExp
    81 case class Assign(s: String, a: AExp) extends Stmt
    96 case class Assign(s: String, a: AExp) extends Stmt
    82 case class Read(s: String) extends Stmt
    97 case class Read(s: String) extends Stmt
    83 case class WriteId(s: String) extends Stmt  // for printing values of variables
    98 case class WriteId(s: String) extends Stmt  // for printing values of variables
    84 case class WriteString(s: String) extends Stmt  // for printing words
    99 case class WriteString(s: String) extends Stmt  // for printing words
    85 case class For(counter: String, lower: AExp, upper: AExp, code: Block) extends Stmt
   100 case class For(counter: String, lower: AExp, upper: AExp, code: Block) extends Stmt
       
   101 case object Break extends Stmt
    86 
   102 
    87 
   103 
    88 case class Var(s: String) extends AExp
   104 case class Var(s: String) extends AExp
    89 case class Num(i: Int) extends AExp
   105 case class Num(i: Int) extends AExp
    90 case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
   106 case class Aop(o: String, a1: AExp, a2: AExp) extends AExp
   115         case _ => Set()
   131         case _ => Set()
   116     }
   132     }
   117 }
   133 }
   118 
   134 
   119 // WHILE Language Parsing
   135 // WHILE Language Parsing
       
   136 
       
   137 // WHILE Language Parsing
   120 lazy val AExp: Parser[List[Token], AExp] = 
   138 lazy val AExp: Parser[List[Token], AExp] = 
   121   (Te ~ T_OP("+") ~ AExp) ==> { case x ~ _ ~ z => Aop("+", x, z): AExp } ||
   139   (Te ~ T_OP("+") ~ AExp).map{ case x ~ _ ~ z => Aop("+", x, z): AExp } ||
   122   (Te ~ T_OP("-") ~ AExp) ==> { case x ~ _ ~ z => Aop("-", x, z): AExp } || Te
   140   (Te ~ T_OP("-") ~ AExp).map{ case x ~ _ ~ z => Aop("-", x, z): AExp } || Te
   123 lazy val Te: Parser[List[Token], AExp] = 
   141 lazy val Te: Parser[List[Token], AExp] = 
   124   (Fa ~ T_OP("*") ~ Te) ==> { case x ~ _ ~ z => Aop("*", x, z): AExp } || 
   142   (Fa ~ T_OP("*") ~ Te).map{ case x ~ _ ~ z => Aop("*", x, z): AExp } || 
   125   (Fa ~ T_OP("/") ~ Te) ==> { case x ~ _ ~ z => Aop("/", x, z): AExp } || 
   143   (Fa ~ T_OP("/") ~ Te).map{ case x ~ _ ~ z => Aop("/", x, z): AExp } || 
   126   (Fa ~ T_OP("%") ~ Te) ==> { case x ~ _ ~ z => Aop("%", x, z): AExp } || Fa  
   144   (Fa ~ T_OP("%") ~ Te).map{ case x ~ _ ~ z => Aop("%", x, z): AExp } || Fa  
   127 lazy val Fa: Parser[List[Token], AExp] = 
   145 lazy val Fa: Parser[List[Token], AExp] = 
   128    (T_PAREN("(") ~ AExp ~ T_PAREN(")")) ==> { case _ ~ y ~ _ => y } || 
   146    (T_PAREN("(") ~ AExp ~ T_PAREN(")")).map{ case _ ~ y ~ _ => y } || 
   129    IdParser() ==> Var  || 
   147    IdParser().map{Var(_)}  || 
   130    NumParser() ==> Num
   148    NumParser().map{Num(_)}
   131 
   149 
   132 lazy val BExp: Parser[List[Token], BExp] = 
   150 lazy val BExp: Parser[List[Token], BExp] = 
   133    (AExp ~ T_OP("==") ~ AExp) ==> { case x ~ _ ~ z => Bop("==", x, z): BExp } || 
   151    (AExp ~ T_OP("==") ~ AExp).map{ case x ~ _ ~ z => Bop("==", x, z): BExp } || 
   134    (AExp ~ T_OP("!=") ~ AExp) ==> { case x ~ _ ~ z => Bop("!=", x, z): BExp } || 
   152    (AExp ~ T_OP("!=") ~ AExp).map{ case x ~ _ ~ z => Bop("!=", x, z): BExp } || 
   135    (AExp ~ T_OP("<") ~ AExp) ==> { case x ~ _ ~ z => Bop("<", x, z): BExp } || 
   153    (AExp ~ T_OP("<") ~ AExp).map{ case x ~ _ ~ z => Bop("<", x, z): BExp } || 
   136    (AExp ~ T_OP(">") ~ AExp) ==> { case x ~ _ ~ z => Bop(">", x, z): BExp } ||
   154    (AExp ~ T_OP(">") ~ AExp).map{ case x ~ _ ~ z => Bop(">", x, z): BExp } ||
   137    (T_PAREN("(") ~ BExp ~ List(T_PAREN(")"), T_OP("&&")) ~ BExp) ==> { case _ ~ y ~ _ ~ v => And(y, v): BExp } ||
   155    (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("&&") ~ BExp).map{ case _ ~ y ~ _ ~ _ ~ v => And(y, v): BExp } ||
   138    (T_PAREN("(") ~ BExp ~ List(T_PAREN(")"), T_OP("||")) ~ BExp) ==> { case _ ~ y ~ _ ~ v => Or(y, v): BExp } ||
   156    (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("||") ~ BExp).map{ case _ ~ y ~ _ ~ _ ~ v => Or(y, v): BExp } ||
   139    (T_KEYWORD("true") ==> (_ => True: BExp )) || 
   157    (T_KEYWORD("true").map(_ => True: BExp )) || 
   140    (T_KEYWORD("false") ==> (_ => False: BExp )) ||
   158    (T_KEYWORD("false").map(_ => False: BExp )) ||
   141    (T_PAREN("(") ~ BExp ~ T_PAREN(")")) ==> { case _ ~ x ~ _ => x }
   159    (T_PAREN("(") ~ BExp ~ T_PAREN(")")).map{ case _ ~ x ~ _ => x }
   142 
   160 
   143 lazy val Stmt: Parser[List[Token], Stmt] =
   161 lazy val Stmt: Parser[List[Token], Stmt] =
   144     T_KEYWORD("skip") ==> (_ => Skip: Stmt) ||
   162     T_KEYWORD("skip").map(_ => Skip: Stmt) ||
   145     (IdParser() ~ T_OP(":=") ~ AExp) ==> { case id ~ _ ~ z => Assign(id, z): Stmt } ||
   163     T_KEYWORD("break").map(_ => Break: Stmt) ||
   146     (T_KEYWORD("if") ~ BExp ~ T_KEYWORD("then") ~ Block ~ T_KEYWORD("else") ~ Block) ==> { case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w): Stmt } ||
   164     (IdParser() ~ T_OP(":=") ~ AExp).map{ case id ~ _ ~ z => Assign(id, z): Stmt } ||
   147     (T_KEYWORD("while") ~ BExp ~ T_KEYWORD("do") ~ Block) ==> { case _ ~ y ~ _ ~ w => While(y, w) : Stmt } ||
   165     (T_KEYWORD("if") ~ BExp ~ T_KEYWORD("then") ~ Block ~ T_KEYWORD("else") ~ Block).map{ case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w): Stmt } ||
   148     (T_KEYWORD("read") ~ IdParser()) ==> { case _ ~ id => Read(id): Stmt} ||
   166     (T_KEYWORD("while") ~ BExp ~ T_KEYWORD("do") ~ Block).map{ case _ ~ y ~ _ ~ w => While(y, w) : Stmt } ||
   149     (T_KEYWORD("write") ~ IdParser()) ==> { case _ ~ id => WriteId(id): Stmt} ||
   167     (T_KEYWORD("for") ~ IdParser() ~ T_OP(":=") ~ AExp ~T_KEYWORD("upto") ~ AExp ~ T_KEYWORD("do") ~ Block).map{ 
   150     (T_KEYWORD("write") ~ StringParser()) ==> { case _ ~ s => WriteString(s): Stmt} ||
   168         case _ ~ id ~ _ ~ low ~ _ ~ high ~ _ ~ bl => For(id, low, high, bl) : Stmt } ||
   151     (T_KEYWORD("for") ~ IdParser() ~ T_OP(":=") ~ AExp ~ T_KEYWORD("upto") ~ AExp ~ T_KEYWORD("do") ~ Block) ==> {
   169     (T_KEYWORD("read") ~ IdParser()).map{ case _ ~ id => Read(id): Stmt} ||
   152       case _ ~ id ~ _ ~ lower ~ _ ~ upper ~ _ ~ blck => For(id, lower, upper, blck): Stmt
   170     (T_KEYWORD("write") ~ IdParser()).map{ case _ ~ id => WriteId(id): Stmt} ||
   153     }
   171     (T_KEYWORD("write") ~ StringParser()).map{ case _ ~ s => WriteString(s): Stmt} || 
       
   172     (T_KEYWORD("write") ~ T_PAREN("(") ~ IdParser() ~ T_PAREN(")")).map{ case _ ~ _ ~ id ~ _ => WriteId(id): Stmt} ||
       
   173     (T_KEYWORD("write") ~  T_PAREN("(") ~ StringParser() ~ T_PAREN(")")).map{ case _ ~ _ ~ s ~ _ => WriteString(s): Stmt}
   154 
   174 
   155 lazy val Stmts: Parser[List[Token], Block] =
   175 lazy val Stmts: Parser[List[Token], Block] =
   156     (Stmt ~ T_SEMI ~ Stmts) ==> { case x ~ _ ~ z => x :: z : Block } ||
   176     (Stmt ~ T_SEMI ~ Stmts).map{ case x ~ _ ~ z => x :: z : Block } ||
   157     (Stmt ==> (s => List(s) : Block))
   177     (Stmt.map(s => List(s) : Block))
   158 
   178 
   159 lazy val Block: Parser[List[Token], Block] =
   179 lazy val Block: Parser[List[Token], Block] =
   160     (T_PAREN("{") ~ Stmts ~ T_PAREN("}")) ==> { case x ~ y ~ z => y} ||
   180     (T_PAREN("{") ~ Stmts ~ T_PAREN("}")).map{ case x ~ y ~ z => y} ||
   161     (Stmt ==> (s => List(s)))
   181     (Stmt.map(s => List(s)))
   162 
   182 
       
   183