progs/parser-combinators/comb2.sc
changeset 971 51e00f223792
parent 965 94f5cce73a4f
child 972 ebb4a40d9bae
equal deleted inserted replaced
970:1d4659dd83fe 971:51e00f223792
     7 //
     7 //
     8 // call with
     8 // call with
     9 //
     9 //
    10 //    amm comb2.sc
    10 //    amm comb2.sc
    11 
    11 
       
    12 
    12 // more convenience for the map parsers later on;
    13 // more convenience for the map parsers later on;
    13 // it allows writing nested patterns as
    14 // it allows writing nested patterns as
    14 // case x ~ y ~ z => ...
    15 // case x ~ y ~ z => ...
    15 
    16 
    16 
    17 case class ~[+A, +B](x: A, y: B)
    17 case class o[+A, +B](x: A, y: B)
    18 
    18 
    19 
    19 
    20 // parser combinators
    20 type IsSeq[I] = I => Seq[?]
    21 abstract class Parser[I, T] { p =>
    21 
       
    22 abstract class Parser[I, T](using is: I => Seq[?])  {
       
    23   def parse(in: I): Set[(T, I)]  
    22   def parse(in: I): Set[(T, I)]  
    24 
    23 
    25   def parse_all(in: I) : Set[T] =
    24   def parse_all(in: I)(using toSeq: I => Seq[?]) : Set[T] =
    26     for ((hd, tl) <- parse(in); 
    25     for ((hd, tl) <- parse(in); 
    27         if is(tl).isEmpty) yield hd
    26         if toSeq(tl).isEmpty) yield hd
    28 }
    27 
    29 
    28   // alternative parser
    30 // parser combinators
    29   def ||(q: => Parser[I, T]) = new Parser[I, T] {
    31 
    30     def parse(in: I) = p.parse(in) ++ q.parse(in)
    32 // alternative parser
    31   }
    33 class AltParser[I : IsSeq, T](p: => Parser[I, T], 
    32 
    34                               q: => Parser[I, T]) extends Parser[I, T] {
    33   // sequence parser
    35   def parse(in: I) = p.parse(in) ++ q.parse(in)   
    34   def ~[S](q: => Parser[I, S]) = new Parser[I, T ~ S] {
    36 }
    35     def parse(in: I) = 
    37 
    36       for ((hd1, tl1) <- p.parse(in); 
    38 // sequence parser
    37           (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2)
    39 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], 
    38   }
    40                                  q: => Parser[I, S]) extends Parser[I, o[T, S]] {
    39 
    41   def parse(in: I) = 
    40   // map parser
    42     for ((hd1, tl1) <- p.parse(in); 
    41   def map[S](f: => T => S) = new Parser[I, S] {
    43          (hd2, tl2) <- q.parse(tl1)) yield (new o(hd1, hd2), tl2)
    42     def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
    44 }
    43   }
    45 
    44 }
    46 // map parser
       
    47 class MapParser[I : IsSeq, T, S](p: => Parser[I, T], 
       
    48                                 f: T => S) extends Parser[I, S] {
       
    49   def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
       
    50 }
       
    51 
       
    52 
       
    53 
    45 
    54 // atomic parser for (particular) strings
    46 // atomic parser for (particular) strings
    55 case class StrParser(s: String) extends Parser[String, String] {
    47 case class StrParser(s: String) extends Parser[String, String] {
    56   def parse(sb: String) = {
    48   def parse(sb: String) = {
    57     val (prefix, suffix) = sb.splitAt(s.length)
    49     val (prefix, suffix) = sb.splitAt(s.length)
    65   def parse(sb: String) = reg.findPrefixOf(sb) match {
    57   def parse(sb: String) = reg.findPrefixOf(sb) match {
    66     case None => Set()
    58     case None => Set()
    67     case Some(s) => Set(sb.splitAt(s.length))
    59     case Some(s) => Set(sb.splitAt(s.length))
    68   }
    60   }
    69 }
    61 }
    70 
       
    71 
    62 
    72 // atomic parser for numbers (transformed into Ints)
    63 // atomic parser for numbers (transformed into Ints)
    73 case object NumParser extends Parser[String, Int] {
    64 case object NumParser extends Parser[String, Int] {
    74   val reg = "[0-9]+".r
    65   val reg = "[0-9]+".r
    75   def parse(sb: String) = reg.findPrefixOf(sb) match {
    66   def parse(sb: String) = reg.findPrefixOf(sb) match {
    87 // p"<_some_string_>" 
    78 // p"<_some_string_>" 
    88 
    79 
    89 extension (sc: StringContext) 
    80 extension (sc: StringContext) 
    90   def p(args: Any*) = StrParser(sc.s(args*))
    81   def p(args: Any*) = StrParser(sc.s(args*))
    91 
    82 
    92 
       
    93 // more convenient syntax for parser combinators
       
    94 extension [I : IsSeq, T](p: Parser[I, T]) {
       
    95   def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
       
    96   def o[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
       
    97   def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
       
    98 }
       
    99 
       
   100 
       
   101 // the abstract syntax trees for the WHILE language
    83 // the abstract syntax trees for the WHILE language
   102 abstract class Stmt
    84 abstract class Stmt
   103 abstract class AExp
    85 abstract class AExp
   104 abstract class BExp 
    86 abstract class BExp 
   105 
    87 
   122 case class Or(b1: BExp, b2: BExp) extends BExp
   104 case class Or(b1: BExp, b2: BExp) extends BExp
   123 
   105 
   124 
   106 
   125 // arithmetic expressions
   107 // arithmetic expressions
   126 lazy val AExp: Parser[String, AExp] = 
   108 lazy val AExp: Parser[String, AExp] = 
   127   (Te o p"+" o AExp).map[AExp]{ case x o _ o z => Aop("+", x, z): AExp } ||
   109   (Te ~ p"+" ~ AExp).map[AExp]{ case x ~ _ ~ z => Aop("+", x, z) } ||
   128   (Te o p"-" o AExp).map[AExp]{ case x o _ o z => Aop("-", x, z) } || Te
   110   (Te ~ p"-" ~ AExp).map[AExp]{ case x ~ _ ~ z => Aop("-", x, z) } || Te
   129 lazy val Te: Parser[String, AExp] = 
   111 lazy val Te: Parser[String, AExp] = 
   130   (Fa o p"*" o Te).map[AExp]{ case x o _ o z => Aop("*", x, z) } || 
   112   (Fa ~ p"*" ~ Te).map[AExp]{ case x ~ _ ~ z => Aop("*", x, z) } || 
   131   (Fa o p"/" o Te).map[AExp]{ case x o _ o z => Aop("/", x, z) } || Fa  
   113   (Fa ~ p"/" ~ Te).map[AExp]{ case x ~ _ ~ z => Aop("/", x, z) } || Fa  
   132 lazy val Fa: Parser[String, AExp] = 
   114 lazy val Fa: Parser[String, AExp] = 
   133    (p"(" o AExp o p")").map{ case _ o y o _ => y } || 
   115    (p"(" ~ AExp ~ p")").map{ case _ ~ y ~ _ => y } || 
   134    IdParser.map(Var(_)) || 
   116    IdParser.map(Var(_)) || 
   135    NumParser.map(Num(_))
   117    NumParser.map(Num(_))
   136 
   118 
   137 // boolean expressions with some simple nesting
   119 // boolean expressions with some simple nesting
   138 lazy val BExp: Parser[String, BExp] = 
   120 lazy val BExp: Parser[String, BExp] = 
   139    (AExp ~ p"==" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("==", x, z) } || 
   121   (AExp ~ p"==" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("==", x, z) } || 
   140    (AExp ~ p"!=" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("!=", x, z) } || 
   122   (AExp ~ p"!=" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("!=", x, z) } || 
   141    (AExp ~ p"<" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("<", x, z) } || 
   123   (AExp ~ p"<" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop("<", x, z) } || 
   142    (AExp ~ p">" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop(">", x, z) } ||
   124   (AExp ~ p">" ~ AExp).map[BExp]{ case x ~ _ ~ z => Bop(">", x, z) } ||
   143    (p"(" ~ BExp ~ p")" ~ p"&&" ~ BExp).map[BExp]{ case _ ~ y ~ _ ~ _ ~ v => And(y, v) } ||
   125   (p"(" ~ BExp ~ p")" ~ p"&&" ~ BExp).map[BExp]{ case _ ~ y ~ _ ~ _ ~ v => And(y, v) } ||
   144    (p"(" ~ BExp ~ p")" ~ p"||" ~ BExp).map[BExp]{ case _ ~ y ~ _ ~ _ ~ v => Or(y, v) } ||
   126   (p"(" ~ BExp ~ p")" ~ p"||" ~ BExp).map[BExp]{ case _ ~ y ~ _ ~ _ ~ v => Or(y, v) } ||
   145    (p"true".map[BExp]{ _ => True }) || 
   127   (p"true".map[BExp]{ _ => True }) || 
   146    (p"false".map[BExp]{ _ => False }) ||
   128   (p"false".map[BExp]{ _ => False }) ||
   147    (p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ x ~ _ => x }
   129   (p"(" ~ BExp ~ p")").map[BExp]{ case _ ~ x ~ _ => x }
   148 
   130 
   149 // a single statement 
   131 // Stmt: a single statement
       
   132 // Stmts: multiple statements
       
   133 // Block: blocks (enclosed in curly braces)
   150 lazy val Stmt: Parser[String, Stmt] =
   134 lazy val Stmt: Parser[String, Stmt] =
   151   ((p"skip".map[Stmt]{_ => Skip }) ||
   135   ((p"skip".map[Stmt]{_ => Skip }) ||
   152    (IdParser ~ p":=" ~ AExp).map[Stmt]{ case x ~ _ ~ z => Assign(x, z) } ||
   136   (IdParser ~ p":=" ~ AExp).map[Stmt]{ case x ~ _ ~ z => Assign(x, z) } ||
   153    (p"write(" ~ IdParser ~ p")").map[Stmt]{ case _ ~ y ~ _ => Write(y) } ||
   137   (p"write(" ~ IdParser ~ p")").map[Stmt]{ case _ ~ y ~ _ => Write(y) } ||
   154    (p"if" ~ BExp ~ p"then" ~ Block ~ p"else" ~ Block)
   138   (p"if" ~ BExp ~ p"then" ~ Block ~ p"else" ~ Block)
   155      .map[Stmt]{ case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w) } ||
   139     .map[Stmt]{ case _ ~ y ~ _ ~ u ~ _ ~ w => If(y, u, w) } ||
   156    (p"while" ~ BExp ~ p"do" ~ Block).map[Stmt]{ case _ ~ y ~ _ ~ w => While(y, w) })   
   140   (p"while" ~ BExp ~ p"do" ~ Block).map[Stmt]{ case _ ~ y ~ _ ~ w => While(y, w) })  
   157  
       
   158  
       
   159 // statements
       
   160 lazy val Stmts: Parser[String, Block] =
   141 lazy val Stmts: Parser[String, Block] =
   161   (Stmt ~ p";" ~ Stmts).map[Block]{ case x ~ _ ~ z => x :: z } ||
   142   (Stmt ~ p";" ~ Stmts).map[Block]{ case x ~ _ ~ z => x :: z } ||
   162   (Stmt.map[Block]{ s => List(s) })
   143   (Stmt.map[Block]{ s => List(s) })
   163 
       
   164 // blocks (enclosed in curly braces)
       
   165 lazy val Block: Parser[String, Block] =
   144 lazy val Block: Parser[String, Block] =
   166   ((p"{" ~ Stmts ~ p"}").map{ case _ ~ y ~ _ => y } || 
   145   ((p"{" ~ Stmts ~ p"}").map{ case _ ~ y ~ _ => y } || 
   167    (Stmt.map(s => List(s))))
   146   (Stmt.map(s => List(s))))
   168 
   147 
   169 
   148 
   170 // Examples
   149 // Examples
   171 println(AExp.parse_all("2*2*2"))
   150 println(AExp.parse_all("2*2*2"))
   172 println(BExp.parse_all("5+3"))
   151 println(BExp.parse_all("5+3"))
   192 
   171 
   193 
   172 
   194 // an interpreter for the WHILE language
   173 // an interpreter for the WHILE language
   195 type Env = Map[String, Int]
   174 type Env = Map[String, Int]
   196 
   175 
   197 def eval_aexp(a: AExp, env: Env) : Int = a match {
   176 def eval_aexp(a: AExp, env: Env) : Int =
   198   case Num(i) => i
   177   a match {
   199   case Var(s) => env(s)
   178     case Num(i) => i
   200   case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env)
   179     case Var(s) => env(s)
   201   case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env)
   180     case Aop("+", a1, a2) => eval_aexp(a1, env) + eval_aexp(a2, env)
   202   case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env)
   181     case Aop("-", a1, a2) => eval_aexp(a1, env) - eval_aexp(a2, env)
   203   case Aop("/", a1, a2) => eval_aexp(a1, env) / eval_aexp(a2, env)
   182     case Aop("*", a1, a2) => eval_aexp(a1, env) * eval_aexp(a2, env)
   204 }
   183     case Aop("/", a1, a2) => eval_aexp(a1, env) / eval_aexp(a2, env)
   205 
   184   }
   206 def eval_bexp(b: BExp, env: Env) : Boolean = b match {
   185 
   207   case True => true
   186 def eval_bexp(b: BExp, env: Env) : Boolean =
   208   case False => false
   187   b match {
   209   case Bop("==", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env)
   188     case True => true
   210   case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env))
   189     case False => false
   211   case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env)
   190     case Bop("==", a1, a2) => eval_aexp(a1, env) == eval_aexp(a2, env)
   212   case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env)
   191     case Bop("!=", a1, a2) => !(eval_aexp(a1, env) == eval_aexp(a2, env))
   213   case And(b1, b2) => eval_bexp(b1, env) && eval_bexp(b2, env)
   192     case Bop(">", a1, a2) => eval_aexp(a1, env) > eval_aexp(a2, env)
   214   case Or(b1, b2) => eval_bexp(b1, env) || eval_bexp(b2, env)
   193     case Bop("<", a1, a2) => eval_aexp(a1, env) < eval_aexp(a2, env)
   215 }
   194     case And(b1, b2) => eval_bexp(b1, env) && eval_bexp(b2, env)
   216 
   195     case Or(b1, b2) => eval_bexp(b1, env) || eval_bexp(b2, env)
   217 def eval_stmt(s: Stmt, env: Env) : Env = s match {
   196   }
   218   case Skip => env
   197 
   219   case Assign(x, a) => env + (x -> eval_aexp(a, env))
   198 def eval_stmt(s: Stmt, env: Env) : Env =
   220   case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) 
   199   s match {
   221   case While(b, bl) => 
   200     case Skip => env
   222     if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env))
   201     case Assign(x, a) => env + (x -> eval_aexp(a, env))
   223     else env
   202     case If(b, bl1, bl2) => if (eval_bexp(b, env)) eval_bl(bl1, env) else eval_bl(bl2, env) 
   224   case Write(x) => { println(env(x)) ; env }  
   203     case While(b, bl) => 
   225 }
   204       if (eval_bexp(b, env)) eval_stmt(While(b, bl), eval_bl(bl, env))
   226 
   205       else env
   227 def eval_bl(bl: Block, env: Env) : Env = bl match {
   206     case Write(x) => { println(env(x)) ; env }  
   228   case Nil => env
   207   }
   229   case s::bl => eval_bl(bl, eval_stmt(s, env))
   208 def eval_bl(bl: Block, env: Env) : Env =
   230 }
   209   bl match {
       
   210     case Nil => env
       
   211     case s::bl => eval_bl(bl, eval_stmt(s, env))
       
   212   }
   231 
   213 
   232 def eval(bl: Block) : Env = eval_bl(bl, Map())
   214 def eval(bl: Block) : Env = eval_bl(bl, Map())
   233 
   215 
   234 // parse + evaluate fib program; then lookup what is
   216 // parse + evaluate fib program; then lookup what is
   235 // stored under the variable "result" 
   217 // stored under the variable "result" 
   236 println(eval(Stmts.parse_all(fib).head)("result"))
   218 println(eval(Stmts.parse_all(fib).head)("result"))
   237 
   219 
   238 
   220 
   239 // more examles
   221 // more examples
   240 
   222 
   241 // calculate and print all factors bigger 
   223 // calculate and print all factors bigger 
   242 // than 1 and smaller than n
   224 // than 1 and smaller than n
   243 println("Factors")
   225 println("Factors")
   244 
   226