solutions/cw3/parser.sc
changeset 961 c0600f8b6427
parent 959 64ec1884d860
child 974 0cb4bf2469d1
equal deleted inserted replaced
960:c7009356ddd8 961:c0600f8b6427
     1 // CW3
     1 // CW3
     2 
     2 
     3 import $file.lexer
     3 //> using file project.scala
     4 import lexer._ 
     4 //> using file lexer.sc
     5 
     5 import lexer.*
     6 
     6 
     7 case class ~[+A, +B](x: A, y: B)
     7 case class ~[+A, +B](x: A, y: B)
     8 
     8 
     9 // parser combinators
     9 // parser combinators
    10 
    10 //
    11 abstract class Parser[I, T](using is: I => Seq[_])  {
    11 type IsSeq[I] = I => Seq[?]
    12   def parse(in: I): Set[(T, I)]  
    12 
       
    13 abstract class Parser[I : IsSeq, T](using is: I => Seq[?])  {
       
    14   def parse(in: I): Set[(T, I)]
    13 
    15 
    14   def parse_all(in: I) : Set[T] =
    16   def parse_all(in: I) : Set[T] =
    15     for ((hd, tl) <- parse(in); 
    17     for ((hd, tl) <- parse(in);
    16         if is(tl).isEmpty) yield hd
    18         if is(tl).isEmpty) yield hd
    17 }
    19 }
    18 
    20 
       
    21 // parser combinators
       
    22 
    19 // alternative parser
    23 // alternative parser
    20 class AltParser[I, T](p: => Parser[I, T], 
    24 class AltParser[I : IsSeq, T](p: => Parser[I, T],
    21                       q: => Parser[I, T])(using I => Seq[_]) extends Parser[I, T] {
    25                               q: => Parser[I, T]) extends Parser[I, T] {
    22   def parse(in: I) = p.parse(in) ++ q.parse(in)   
    26   def parse(in: I) = p.parse(in) ++ q.parse(in)
    23 }
    27 }
    24 
    28 
    25 // sequence parser
    29 // sequence parser
    26 class SeqParser[I, T, S](p: => Parser[I, T], 
    30 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T],
    27                          q: => Parser[I, S])(using I => Seq[_]) extends Parser[I, ~[T, S]] {
    31                                  q: => Parser[I, S]) extends Parser[I, ~[T, S]] {
    28   def parse(in: I) = 
    32   def parse(in: I) =
    29     for ((hd1, tl1) <- p.parse(in); 
    33     for ((hd1, tl1) <- p.parse(in);
    30          (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)
    34          (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)
    31 }
    35 }
    32 
    36 
    33 // map parser
    37 // map parser
    34 class MapParser[I, T, S](p: => Parser[I, T], 
    38 class MapParser[I : IsSeq, T, S](p: => Parser[I, T],
    35                          f: T => S)(using I => Seq[_]) extends Parser[I, S] {
    39                                  f: T => S) extends Parser[I, S] {
    36   def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
    40   def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
    37 }
    41 }
       
    42 
    38 
    43 
    39 
    44 
    40 /*
    45 /*
    41 // atomic parser for (particular) strings
    46 // atomic parser for (particular) strings
    42 case class StrParser(s: String) extends Parser[String, String] {
    47 case class StrParser(s: String) extends Parser[String, String] {
    44     val (prefix, suffix) = sb.splitAt(s.length)
    49     val (prefix, suffix) = sb.splitAt(s.length)
    45     if (prefix == s) Set((prefix, suffix)) else Set()
    50     if (prefix == s) Set((prefix, suffix)) else Set()
    46   }
    51   }
    47 }
    52 }
    48 
    53 
    49 extension (sc: StringContext) 
    54 extension (sc: StringContext)
    50   def p(args: Any*) = StrParser(sc.s(args:_*))
    55   def p(args: Any*) = StrParser(sc.s(args:_*))
    51 */
    56 */
    52 
    57 
    53 // more convenient syntax for parser combinators
    58 // more convenient syntax for parser combinators
    54 extension [I, T](p: Parser[I, T])(using I => Seq[_]) {
    59 extension [I : IsSeq, T](p: Parser[I, T]) {
    55   def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
    60   def ||(q : => Parser[I, T]) = AltParser[I, T](p, q)
    56   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
    61   def ~[S] (q : => Parser[I, S]) = SeqParser[I, T, S](p, q)
    57   def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
    62   def map[S](f: => T => S) = MapParser[I, T, S](p, f)
    58 }
    63 }
    59 
    64 
    60 // New parser that takes as input a list of tokens
    65 // New parser that takes as input a list of tokens
    61 case class TokenParser(t: Token) extends Parser[List[Token], Token] {
    66 case class TokenParser(t: Token) extends Parser[List[Token], Token] {
    62     def parse(in: List[Token]) = {
    67     def parse(in: List[Token]) = {
    63       // an example of an atomic parser for characters
    68       // an example of an atomic parser for characters
    64       if (!in.isEmpty && in.head == t) Set((t, in.tail)) else Set()
    69       if (!in.isEmpty && in.head == t) Set((t, in.tail)) else Set()
    65     }
    70     }
    66 }   
    71 }
    67 
    72 
    68 case class TokenListParser(ts: List[Token]) extends Parser[List[Token], List[Token]] {
    73 case class TokenListParser(ts: List[Token]) extends Parser[List[Token], List[Token]] {
    69     def parse(tsb: List[Token]) = {
    74     def parse(tsb: List[Token]) = {
    70         val (prefix, suffix) = tsb.splitAt(ts.length)
    75         val (prefix, suffix) = tsb.splitAt(ts.length)
    71         if (prefix == ts) Set((prefix, suffix)) else Set()
    76         if (prefix == ts) Set((prefix, suffix)) else Set()
    72     }
    77     }
    73 }
    78 }
    74 
    79 
    75 // Implicit definitions to go from a token 
    80 // Implicit definitions to go from a token
    76 // or a list of tokens to a TokenListParser
    81 // or a list of tokens to a TokenListParser
    77 implicit def token2parser(t: Token) : Parser[List[Token], Token] = 
    82 implicit def token2parser(t: Token) : Parser[List[Token], Token] =
    78   TokenParser(t)
    83   TokenParser(t)
    79 
    84 
    80 extension (t: Token) {
    85 extension (t: Token) {
    81     def || (q : => Parser[List[Token], Token]) = 
    86     def || (q : => Parser[List[Token], Token]) = AltParser[List[Token], Token](t, q)
    82       new AltParser[List[Token], Token](t, q)
    87     def ~[S](q : => Parser[List[Token], S]) = SeqParser[List[Token], Token, S](t, q)
    83     def ~[S](q : => Parser[List[Token], S]) = 
       
    84       new SeqParser[List[Token], Token, S](t, q)  
       
    85 }
    88 }
    86 
    89 
    87 
    90 
    88 // Abstract Syntax Trees
    91 // Abstract Syntax Trees
    89 abstract class Stmt
    92 abstract class Stmt
   131     }
   134     }
   132 }
   135 }
   133 
   136 
   134 
   137 
   135 // WHILE Language Parsing
   138 // WHILE Language Parsing
   136 lazy val AExp: Parser[List[Token], AExp] = 
   139 lazy val AExp: Parser[List[Token], AExp] =
   137   (Te ~ T_OP("+") ~ AExp).map{ case x ~ _ ~ z => Aop("+", x, z): AExp } ||
   140   (Te ~ T_OP("+") ~ AExp).map{ case x ~ _ ~ z => Aop("+", x, z): AExp } ||
   138   (Te ~ T_OP("-") ~ AExp).map{ case x ~ _ ~ z => Aop("-", x, z): AExp } || Te
   141   (Te ~ T_OP("-") ~ AExp).map{ case x ~ _ ~ z => Aop("-", x, z): AExp } || Te
   139 lazy val Te: Parser[List[Token], AExp] = 
   142 lazy val Te: Parser[List[Token], AExp] =
   140   (Fa ~ T_OP("*") ~ Te).map{ case x ~ _ ~ z => Aop("*", x, z): AExp } || 
   143   (Fa ~ T_OP("*") ~ Te).map{ case x ~ _ ~ z => Aop("*", x, z): AExp } ||
   141   (Fa ~ T_OP("/") ~ Te).map{ case x ~ _ ~ z => Aop("/", x, z): AExp } || 
   144   (Fa ~ T_OP("/") ~ Te).map{ case x ~ _ ~ z => Aop("/", x, z): AExp } ||
   142   (Fa ~ T_OP("%") ~ Te).map{ case x ~ _ ~ z => Aop("%", x, z): AExp } || Fa  
   145   (Fa ~ T_OP("%") ~ Te).map{ case x ~ _ ~ z => Aop("%", x, z): AExp } || Fa
   143 lazy val Fa: Parser[List[Token], AExp] = 
   146 lazy val Fa: Parser[List[Token], AExp] =
   144    (T_PAREN("(") ~ AExp ~ T_PAREN(")")).map{ case _ ~ y ~ _ => y } || 
   147    (T_PAREN("(") ~ AExp ~ T_PAREN(")")).map{ case _ ~ y ~ _ => y } ||
   145    IdParser().map{Var(_)}  || 
   148    IdParser().map{Var(_)}  ||
   146    NumParser().map{Num(_)}
   149    NumParser().map{Num(_)}
   147 
   150 
   148 lazy val BExp: Parser[List[Token], BExp] = 
   151 lazy val BExp: Parser[List[Token], BExp] =
   149    (AExp ~ T_OP("==") ~ AExp).map{ case x ~ _ ~ z => Bop("==", x, z): BExp } || 
   152    (AExp ~ T_OP("==") ~ AExp).map{ case x ~ _ ~ z => Bop("==", x, z): BExp } ||
   150    (AExp ~ T_OP("!=") ~ AExp).map{ case x ~ _ ~ z => Bop("!=", x, z): BExp } || 
   153    (AExp ~ T_OP("!=") ~ AExp).map{ case x ~ _ ~ z => Bop("!=", x, z): BExp } ||
   151    (AExp ~ T_OP("<") ~ AExp).map{ case x ~ _ ~ z => Bop("<", x, z): BExp } || 
   154    (AExp ~ T_OP("<") ~ AExp).map{ case x ~ _ ~ z => Bop("<", x, z): BExp } ||
   152    (AExp ~ T_OP(">") ~ AExp).map{ case x ~ _ ~ z => Bop(">", x, z): BExp } ||
   155    (AExp ~ T_OP(">") ~ AExp).map{ case x ~ _ ~ z => Bop(">", x, z): BExp } ||
   153    (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("&&") ~ BExp).map{ case _ ~ y ~ _ ~ _ ~ v => And(y, v): BExp } ||
   156    (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("&&") ~ BExp).map{ case _ ~ y ~ _ ~ _ ~ v => And(y, v): BExp } ||
   154    (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("||") ~ BExp).map{ case _ ~ y ~ _ ~ _ ~ v => Or(y, v): BExp } ||
   157    (T_PAREN("(") ~ BExp ~ T_PAREN(")") ~ T_OP("||") ~ BExp).map{ case _ ~ y ~ _ ~ _ ~ v => Or(y, v): BExp } ||
   155    (T_KEYWORD("true").map(_ => True: BExp )) || 
   158    (T_KEYWORD("true").map(_ => True: BExp )) ||
   156    (T_KEYWORD("false").map(_ => False: BExp )) ||
   159    (T_KEYWORD("false").map(_ => False: BExp )) ||
   157    (T_PAREN("(") ~ BExp ~ T_PAREN(")")).map{ case _ ~ x ~ _ => x }
   160    (T_PAREN("(") ~ BExp ~ T_PAREN(")")).map{ case _ ~ x ~ _ => x }
   158 
   161 
   159 lazy val Stmt: Parser[List[Token], Stmt] =
   162 lazy val Stmt: Parser[List[Token], Stmt] =
   160     T_KEYWORD("skip").map(_ => Skip: Stmt) ||
   163     T_KEYWORD("skip").map(_ => Skip: Stmt) ||
   161     (IdParser() ~ T_OP(":=") ~ AExp).map{ case id ~ _ ~ z => Assign(id, z): Stmt } ||
   164     (IdParser() ~ T_OP(":=") ~ AExp).map{ case id ~ _ ~ z => Assign(id, z): Stmt } ||
   162     (T_KEYWORD("if") ~ BExp ~ T_KEYWORD("then") ~ Block ~ T_KEYWORD("else") ~ Block).map{ case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w): Stmt } ||
   165     (T_KEYWORD("if") ~ BExp ~ T_KEYWORD("then") ~ Block ~ T_KEYWORD("else") ~ Block).map{ case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w): Stmt } ||
   163     (T_KEYWORD("while") ~ BExp ~ T_KEYWORD("do") ~ Block).map{ case _ ~ y ~ _ ~ w => While(y, w) : Stmt } ||
   166     (T_KEYWORD("while") ~ BExp ~ T_KEYWORD("do") ~ Block).map{ case _ ~ y ~ _ ~ w => While(y, w) : Stmt } ||
   164     (T_KEYWORD("read") ~ IdParser()).map{ case _ ~ id => Read(id): Stmt} ||
   167     (T_KEYWORD("read") ~ IdParser()).map{ case _ ~ id => Read(id): Stmt} ||
   165     (T_KEYWORD("write") ~ IdParser()).map{ case _ ~ id => WriteId(id): Stmt} ||
   168     (T_KEYWORD("write") ~ IdParser()).map{ case _ ~ id => WriteId(id): Stmt} ||
   166     (T_KEYWORD("write") ~ StringParser()).map{ case _ ~ s => WriteString(s): Stmt} || 
   169     (T_KEYWORD("write") ~ StringParser()).map{ case _ ~ s => WriteString(s): Stmt} ||
   167     (T_KEYWORD("write") ~ T_PAREN("(") ~ IdParser() ~ T_PAREN(")")).map{ case _ ~ _ ~ id ~ _ => WriteId(id): Stmt} ||
   170     (T_KEYWORD("write") ~ T_PAREN("(") ~ IdParser() ~ T_PAREN(")")).map{ case _ ~ _ ~ id ~ _ => WriteId(id): Stmt} ||
   168     (T_KEYWORD("write") ~  T_PAREN("(") ~ StringParser() ~ T_PAREN(")")).map{ case _ ~ _ ~ s ~ _ => WriteString(s): Stmt}
   171     (T_KEYWORD("write") ~  T_PAREN("(") ~ StringParser() ~ T_PAREN(")")).map{ case _ ~ _ ~ s ~ _ => WriteString(s): Stmt}
   169 
   172 
   170 lazy val Stmts: Parser[List[Token], Block] =
   173 lazy val Stmts: Parser[List[Token], Block] =
   171     (Stmt ~ T_SEMI ~ Stmts).map{ case x ~ _ ~ z => x :: z : Block } ||
   174     (Stmt ~ T_SEMI ~ Stmts).map{ case x ~ _ ~ z => x :: z : Block } ||
   217 import scala.io.StdIn.readInt
   220 import scala.io.StdIn.readInt
   218 
   221 
   219 def eval_stmt(s: Stmt, env: Env) : Env = s match {
   222 def eval_stmt(s: Stmt, env: Env) : Env = s match {
   220   case Skip => env
   223   case Skip => env
   221   case Assign(x, a) => env + (x -> eval_aexp(a, env))
   224   case Assign(x, a) => env + (x -> eval_aexp(a, env))
   222   case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) 
   225   case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env)
   223   case While(b, bl) => 
   226   case While(b, bl) =>
   224     if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env))
   227     if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env))
   225     else env
   228     else env
   226 
   229 
   227   case WriteId(x) => { print(env(x)) ; env }
   230   case WriteId(x) => { print(env(x)) ; env }
   228   case WriteString(x) => {
   231   case WriteString(x) => {
   229         print(x.replaceAll("\"", "").replaceAll("""\\n""", "\n")) ;
   232         print(x.replaceAll("\"", "").replaceAll("""\\n""", "\n")) ;
   230         env
   233         env
   231        }
   234        }
   232 
   235 
   233   case Read(x) => { 
   236   case Read(x) => {
   234       println("Enter an integer and press ENTER:") ; 
   237       println("Enter an integer and press ENTER:") ;
   235       val n = readInt() ; // Note: Does not work when using the REPL
   238       val n = readInt() ; // Note: Does not work when using the REPL
   236       eval_stmt(Assign(x, Num(n)), env) 
   239       eval_stmt(Assign(x, Num(n)), env)
   237   }
   240   }
   238 }
   241 }
   239 
   242 
   240 def eval_bl(bl: Block, env: Env) : Env = bl match {
   243 def eval_bl(bl: Block, env: Env) : Env = bl match {
   241   case Nil => env
   244   case Nil => env
   249 
   252 
   250 //println(eval(Stmts.parse_all(tokenise(os.read(os.pwd / "primes.while"))).head))
   253 //println(eval(Stmts.parse_all(tokenise(os.read(os.pwd / "primes.while"))).head))
   251 
   254 
   252 
   255 
   253 
   256 
   254 @main
   257 //@main
   255 def run(file: String) = {
   258 def run(file: String) = {
   256   val contents = os.read(os.pwd / file)
   259   val contents = os.read(os.pwd / file)
   257   println(s"Lex $file: ")
   260   println(s"Lex $file: ")
   258   println(tokenise(contents))
   261   println(tokenise(contents))
   259   println(s"Parse $file: ")
   262   println(s"Parse $file: ")
   260   println(Stmts.parse_all(tokenise(contents)).head)
   263   println(Stmts.parse_all(tokenise(contents)).head)
   261   println(s"Eval $file: ")
   264   println(s"Eval $file: ")
   262   println(eval(Stmts.parse_all(tokenise(contents)).head))
   265   println(eval(Stmts.parse_all(tokenise(contents)).head))
   263 }
   266 }
   264 
   267 
   265 @main
   268 //@main
   266 def test(file: String) = {
   269 def test(file: String) = {
   267   val contents = os.read(os.pwd / file)
   270   val contents = os.read(os.pwd / file)
   268   println(s"Lex $file: ")
   271   println(s"Lex $file: ")
   269   println(tokenise(contents))
   272   println(tokenise(contents))
   270   println(s"Parse $file: ")
   273   println(s"Parse $file: ")
   277 println(eval(Stmts.parse_all(tokenise(os.read(os.pwd / "primes.while"))).head))
   280 println(eval(Stmts.parse_all(tokenise(os.read(os.pwd / "primes.while"))).head))
   278 val end = System.nanoTime()
   281 val end = System.nanoTime()
   279 println("Time taken in seconds: ")
   282 println("Time taken in seconds: ")
   280 println((end - start)/(1.0e9))
   283 println((end - start)/(1.0e9))
   281 */
   284 */
       
   285 
       
   286 test("primes.while")
       
   287 run("primes.while")