solutions/cw3/parser.sc
changeset 919 53f08d873e09
parent 905 15973df32613
child 921 bb54e7aa1a3f
equal deleted inserted replaced
918:53e7da9f372a 919:53f08d873e09
     2 
     2 
     3 import $file.lexer
     3 import $file.lexer
     4 import lexer._ 
     4 import lexer._ 
     5 
     5 
     6 
     6 
     7 case class ~[+A, +B](_1: A, _2: B)
     7 case class ~[+A, +B](x: A, y: B)
     8 type IsSeq[A] = A => Seq[_]
     8 
     9 
     9 // parser combinators
    10 abstract class Parser[I : IsSeq, T] {
    10 
    11   def parse(ts: I): Set[(T, I)]
    11 abstract class Parser[I, T](using is: I => Seq[_])  {
    12 
    12   def parse(in: I): Set[(T, I)]  
    13   def parse_all(ts: I) : Set[T] =
    13 
    14     for ((head, tail) <- parse(ts); if tail.isEmpty) yield head
    14   def parse_all(in: I) : Set[T] =
    15 }
    15     for ((hd, tl) <- parse(in); 
    16 
    16         if is(tl).isEmpty) yield hd
    17 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
    17 }
    18   def parse(sb: I) = 
    18 
    19     for ((head1, tail1) <- p.parse(sb); 
    19 // alternative parser
    20          (head2, tail2) <- q.parse(tail1)) yield (new ~(head1, head2), tail2)
    20 class AltParser[I, T](p: => Parser[I, T], 
    21 }
    21                       q: => Parser[I, T])(using I => Seq[_]) extends Parser[I, T] {
    22 
    22   def parse(in: I) = p.parse(in) ++ q.parse(in)   
    23 class AltParser[I : IsSeq, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] {
    23 }
    24   def parse(sb: I) = p.parse(sb) ++ q.parse(sb)   
    24 
    25 }
    25 // sequence parser
    26 
    26 class SeqParser[I, T, S](p: => Parser[I, T], 
    27 class FunParser[I : IsSeq, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] {
    27                          q: => Parser[I, S])(using I => Seq[_]) extends Parser[I, ~[T, S]] {
    28   def parse(sb: I) = 
    28   def parse(in: I) = 
    29     for ((head, tail) <- p.parse(sb)) yield (f(head), tail)
    29     for ((hd1, tl1) <- p.parse(in); 
       
    30          (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)
       
    31 }
       
    32 
       
    33 // map parser
       
    34 class MapParser[I, T, S](p: => Parser[I, T], 
       
    35                          f: T => S)(using I => Seq[_]) extends Parser[I, S] {
       
    36   def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
       
    37 }
       
    38 
       
    39 
       
    40 /*
       
    41 // atomic parser for (particular) strings
       
    42 case class StrParser(s: String) extends Parser[String, String] {
       
    43   def parse(sb: String) = {
       
    44     val (prefix, suffix) = sb.splitAt(s.length)
       
    45     if (prefix == s) Set((prefix, suffix)) else Set()
       
    46   }
       
    47 }
       
    48 
       
    49 extension (sc: StringContext) 
       
    50   def p(args: Any*) = StrParser(sc.s(args:_*))
       
    51 */
       
    52 
       
    53 // more convenient syntax for parser combinators
       
    54 extension [I, T](p: Parser[I, T])(using I => Seq[_]) {
       
    55   def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
       
    56   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
       
    57   def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
    30 }
    58 }
    31 
    59 
    32 // New parser that takes as input a list of tokens
    60 // New parser that takes as input a list of tokens
       
    61 case class TokenParser(t: Token) extends Parser[List[Token], Token] {
       
    62     def parse(in: List[Token]) = {
       
    63       // an example of an atomic parser for characters
       
    64       if (!in.isEmpty && in.head == t) Set((t, in.tail)) else Set()
       
    65     }
       
    66 }   
       
    67 
    33 case class TokenListParser(ts: List[Token]) extends Parser[List[Token], List[Token]] {
    68 case class TokenListParser(ts: List[Token]) extends Parser[List[Token], List[Token]] {
    34     def parse(tsb: List[Token]) = {
    69     def parse(tsb: List[Token]) = {
    35         val (prefix, suffix) = tsb.splitAt(ts.length)
    70         val (prefix, suffix) = tsb.splitAt(ts.length)
    36         if (prefix == ts) Set((prefix, suffix)) else Set()
    71         if (prefix == ts) Set((prefix, suffix)) else Set()
    37     }
    72     }
    38 }
    73 }
    39 
    74 
    40 // Implicit definitions to go from a token 
    75 // Implicit definitions to go from a token 
    41 // or a list of tokens to a TokenListParser
    76 // or a list of tokens to a TokenListParser
    42 implicit def token2parser(t: Token) = TokenListParser(List(t))
    77 implicit def token2parser(t: Token) : Parser[List[Token], Token] = 
    43 implicit def tokenList2parser(ts: List[Token]) = TokenListParser(ts)
    78   TokenParser(t)
    44 
    79 
    45 implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new {
    80 extension (t: Token) {
    46   def || (q : => Parser[I, T]) = new AltParser[I, T](p, q)
    81     def || (q : => Parser[List[Token], Token]) = 
    47   def ==>[S] (f: => T => S) = new FunParser[I, T, S](p, f)
    82       new AltParser[List[Token], Token](t, q)
    48   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
    83     def ~[S](q : => Parser[List[Token], S]) = 
    49 }
    84       new SeqParser[List[Token], Token, S](t, q)  
    50 
    85 }
    51 implicit def TokenOps(t: Token) = new {
    86 
    52     def || (q : => Parser[List[Token], List[Token]]) = new AltParser[List[Token], List[Token]](List(t), q)
       
    53     def || (qs : List[Token]) = new AltParser[List[Token], List[Token]](List(t), qs)
       
    54     def ==>[S] (f: => List[Token] => S) = new FunParser[List[Token], List[Token], S](List(t), f)
       
    55     def ~[S](q : => Parser[List[Token], S]) =
       
    56         new SeqParser[List[Token], List[Token], S](List(t), q)
       
    57     def ~ (qs : List[Token]) =
       
    58         new SeqParser[List[Token], List[Token], List[Token]](List(t), qs)
       
    59 }
       
    60 
       
    61 implicit def TokenListOps(ts: List[Token]) = new {
       
    62     def || (q : => Parser[List[Token], List[Token]]) = new AltParser[List[Token], List[Token]](ts, q)
       
    63     def || (qs : List[Token]) = new AltParser[List[Token], List[Token]](ts, qs)
       
    64     def ==>[S] (f: => List[Token] => S) = new FunParser[List[Token], List[Token], S](ts, f)
       
    65     def ~[S](q : => Parser[List[Token], S]) =
       
    66         new SeqParser[List[Token], List[Token], S](ts, q)
       
    67     def ~ (qs : List[Token]) =
       
    68         new SeqParser[List[Token], List[Token], List[Token]](ts, qs)
       
    69 }
       
    70 
    87 
    71 // Abstract Syntax Trees
    88 // Abstract Syntax Trees
    72 abstract class Stmt
    89 abstract class Stmt
    73 abstract class AExp
    90 abstract class AExp
    74 abstract class BExp
    91 abstract class BExp
   112         case T_STRING(s) :: rest => Set((s, rest))
   129         case T_STRING(s) :: rest => Set((s, rest))
   113         case _ => Set()
   130         case _ => Set()
   114     }
   131     }
   115 }
   132 }
   116 
   133 
       
   134 
   117 // WHILE Language Parsing
   135 // WHILE Language Parsing
   118 lazy val AExp: Parser[List[Token], AExp] = 
   136 lazy val AExp: Parser[List[Token], AExp] = 
   119   (Te ~ T_OP("+") ~ AExp) ==> { case x ~ _ ~ z => Aop("+", x, z): AExp } ||
   137   (Te ~ T_OP("+") ~ AExp).map{ case x ~ _ ~ z => Aop("+", x, z): AExp } ||
   120   (Te ~ T_OP("-") ~ AExp) ==> { case x ~ _ ~ z => Aop("-", x, z): AExp } || Te
   138   (Te ~ T_OP("-") ~ AExp).map{ case x ~ _ ~ z => Aop("-", x, z): AExp } || Te
   121 lazy val Te: Parser[List[Token], AExp] = 
   139 lazy val Te: Parser[List[Token], AExp] = 
   122   (Fa ~ T_OP("*") ~ Te) ==> { case x ~ _ ~ z => Aop("*", x, z): AExp } || 
   140   (Fa ~ T_OP("*") ~ Te).map{ case x ~ _ ~ z => Aop("*", x, z): AExp } || 
   123   (Fa ~ T_OP("/") ~ Te) ==> { case x ~ _ ~ z => Aop("/", x, z): AExp } || 
   141   (Fa ~ T_OP("/") ~ Te).map{ case x ~ _ ~ z => Aop("/", x, z): AExp } || 
   124   (Fa ~ T_OP("%") ~ Te) ==> { case x ~ _ ~ z => Aop("%", x, z): AExp } || Fa  
   142   (Fa ~ T_OP("%") ~ Te).map{ case x ~ _ ~ z => Aop("%", x, z): AExp } || Fa  
   125 lazy val Fa: Parser[List[Token], AExp] = 
   143 lazy val Fa: Parser[List[Token], AExp] = 
   126    (T_PAREN("(") ~ AExp ~ T_PAREN(")")) ==> { case _ ~ y ~ _ => y } || 
   144    (T_PAREN("(") ~ AExp ~ T_PAREN(")")).map{ case _ ~ y ~ _ => y } || 
   127    IdParser() ==> Var  || 
   145    IdParser().map{Var(_)}  || 
   128    NumParser() ==> Num
   146    NumParser().map{Num(_)}
   129 
   147 
   130 lazy val BExp: Parser[List[Token], BExp] = 
   148 lazy val BExp: Parser[List[Token], BExp] = 
   131    (AExp ~ T_OP("==") ~ AExp) ==> { case x ~ _ ~ z => Bop("==", x, z): BExp } || 
   149    (AExp ~ T_OP("==") ~ AExp).map{ case x ~ _ ~ z => Bop("==", x, z): BExp } || 
   132    (AExp ~ T_OP("!=") ~ AExp) ==> { case x ~ _ ~ z => Bop("!=", x, z): BExp } || 
   150    (AExp ~ T_OP("!=") ~ AExp).map{ case x ~ _ ~ z => Bop("!=", x, z): BExp } || 
   133    (AExp ~ T_OP("<") ~ AExp) ==> { case x ~ _ ~ z => Bop("<", x, z): BExp } || 
   151    (AExp ~ T_OP("<") ~ AExp).map{ case x ~ _ ~ z => Bop("<", x, z): BExp } || 
   134    (AExp ~ T_OP(">") ~ AExp) ==> { case x ~ _ ~ z => Bop(">", x, z): BExp } ||
   152    (AExp ~ T_OP(">") ~ AExp).map{ case x ~ _ ~ z => Bop(">", x, z): BExp } ||
   135    (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("&&") ~ BExp) ==> { case _ ~ y ~ _ ~ _ ~ v => And(y, v): BExp } ||
   153    (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("&&") ~ BExp).map{ case _ ~ y ~ _ ~ _ ~ v => And(y, v): BExp } ||
   136    (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("||") ~ BExp) ==> { case _ ~ y ~ _ ~ _ ~ v => Or(y, v): BExp } ||
   154    (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("||") ~ BExp).map{ case _ ~ y ~ _ ~ _ ~ v => Or(y, v): BExp } ||
   137    (T_KEYWORD("true") ==> (_ => True: BExp )) || 
   155    (T_KEYWORD("true").map(_ => True: BExp )) || 
   138    (T_KEYWORD("false") ==> (_ => False: BExp )) ||
   156    (T_KEYWORD("false").map(_ => False: BExp )) ||
   139    (T_PAREN("(") ~ BExp ~ T_PAREN(")")) ==> { case _ ~ x ~ _ => x }
   157    (T_PAREN("(") ~ BExp ~ T_PAREN(")")).map{ case _ ~ x ~ _ => x }
   140 
   158 
   141 lazy val Stmt: Parser[List[Token], Stmt] =
   159 lazy val Stmt: Parser[List[Token], Stmt] =
   142     T_KEYWORD("skip") ==> (_ => Skip: Stmt) ||
   160     T_KEYWORD("skip").map(_ => Skip: Stmt) ||
   143     (IdParser() ~ T_OP(":=") ~ AExp) ==> { case id ~ _ ~ z => Assign(id, z): Stmt } ||
   161     (IdParser() ~ T_OP(":=") ~ AExp).map{ case id ~ _ ~ z => Assign(id, z): Stmt } ||
   144     (T_KEYWORD("if") ~ BExp ~ T_KEYWORD("then") ~ Block ~ T_KEYWORD("else") ~ Block) ==> { case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w): Stmt } ||
   162     (T_KEYWORD("if") ~ BExp ~ T_KEYWORD("then") ~ Block ~ T_KEYWORD("else") ~ Block).map{ case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w): Stmt } ||
   145     (T_KEYWORD("while") ~ BExp ~ T_KEYWORD("do") ~ Block) ==> { case _ ~ y ~ _ ~ w => While(y, w) : Stmt } ||
   163     (T_KEYWORD("while") ~ BExp ~ T_KEYWORD("do") ~ Block).map{ case _ ~ y ~ _ ~ w => While(y, w) : Stmt } ||
   146     (T_KEYWORD("read") ~ IdParser()) ==> { case _ ~ id => Read(id): Stmt} ||
   164     (T_KEYWORD("read") ~ IdParser()).map{ case _ ~ id => Read(id): Stmt} ||
   147     (T_KEYWORD("write") ~ IdParser()) ==> { case _ ~ id => WriteId(id): Stmt} ||
   165     (T_KEYWORD("write") ~ IdParser()).map{ case _ ~ id => WriteId(id): Stmt} ||
   148     (T_KEYWORD("write") ~ StringParser()) ==> { case _ ~ s => WriteString(s): Stmt} || 
   166     (T_KEYWORD("write") ~ StringParser()).map{ case _ ~ s => WriteString(s): Stmt} || 
   149     (T_KEYWORD("write") ~ T_PAREN("(") ~ IdParser() ~ T_PAREN(")")) ==> { case _ ~ _ ~ id ~ _ => WriteId(id): Stmt} ||
   167     (T_KEYWORD("write") ~ T_PAREN("(") ~ IdParser() ~ T_PAREN(")")).map{ case _ ~ _ ~ id ~ _ => WriteId(id): Stmt} ||
   150     (T_KEYWORD("write") ~  T_PAREN("(") ~ StringParser() ~ T_PAREN(")")) ==> { case _ ~ _ ~ s ~ _ => WriteString(s): Stmt}
   168     (T_KEYWORD("write") ~  T_PAREN("(") ~ StringParser() ~ T_PAREN(")")).map{ case _ ~ _ ~ s ~ _ => WriteString(s): Stmt}
   151 
   169 
   152 lazy val Stmts: Parser[List[Token], Block] =
   170 lazy val Stmts: Parser[List[Token], Block] =
   153     (Stmt ~ T_SEMI ~ Stmts) ==> { case x ~ _ ~ z => x :: z : Block } ||
   171     (Stmt ~ T_SEMI ~ Stmts).map{ case x ~ _ ~ z => x :: z : Block } ||
   154     (Stmt ==> (s => List(s) : Block))
   172     (Stmt.map(s => List(s) : Block))
   155 
   173 
   156 lazy val Block: Parser[List[Token], Block] =
   174 lazy val Block: Parser[List[Token], Block] =
   157     (T_PAREN("{") ~ Stmts ~ T_PAREN("}")) ==> { case x ~ y ~ z => y} ||
   175     (T_PAREN("{") ~ Stmts ~ T_PAREN("}")).map{ case x ~ y ~ z => y} ||
   158     (Stmt ==> (s => List(s)))
   176     (Stmt.map(s => List(s)))
   159 
   177 
   160 // Testing with programs 2 & 3
   178 // Testing with programs 2 & 3
   161 
   179 
   162 //println("Fibonacci")
   180 //println("Fibonacci")
   163 //println(Stmts.parse_all(tokenise(os.read(os.pwd / "fib.while"))))
   181 //println(Stmts.parse_all(tokenise(os.read(os.pwd / "fib.while"))))