progs/parser-combinators/comb1.sc
changeset 906 2bf1516d730f
parent 898 45a48c47dcca
child 919 53f08d873e09
equal deleted inserted replaced
905:15973df32613 906:2bf1516d730f
     4 // Call with
     4 // Call with
     5 //
     5 //
     6 //  amm comb1.sc
     6 //  amm comb1.sc
     7 
     7 
     8  
     8  
     9 //  Note, in the lectures I did not show the implicit type constraint
     9 //  Note, in the lectures I did not show the type bound
    10 //  I : IsSeq, which means that the input type 'I' needs to be
    10 //  using is: I => Seq[_], which means that the input 
    11 //  a sequence. 
    11 //  type 'I' needs to be a sequence. 
    12 
    12 
    13 type IsSeq[A] = A => Seq[_]
    13 abstract class Parser[I, T](using is: I => Seq[_])  {
    14 
       
    15 abstract class Parser[I : IsSeq, T]{
       
    16   def parse(in: I): Set[(T, I)]  
    14   def parse(in: I): Set[(T, I)]  
    17 
    15 
    18   def parse_all(in: I) : Set[T] =
    16   def parse_all(in: I) : Set[T] =
    19     for ((hd, tl) <- parse(in); 
    17     for ((hd, tl) <- parse(in); 
    20         if tl.isEmpty) yield hd
    18         if is(tl).isEmpty) yield hd
    21 }
    19 }
    22 
    20 
    23 // parser combinators
    21 // parser combinators
    24 
    22 
    25 // alternative parser
    23 // alternative parser
    26 class AltParser[I : IsSeq, T](p: => Parser[I, T], 
    24 class AltParser[I, T](p: => Parser[I, T], 
    27                               q: => Parser[I, T]) extends Parser[I, T] {
    25                       q: => Parser[I, T])(using I => Seq[_]) extends Parser[I, T] {
    28   def parse(in: I) = p.parse(in) ++ q.parse(in)   
    26   def parse(in: I) = p.parse(in) ++ q.parse(in)   
    29 }
    27 }
    30 
    28 
    31 // sequence parser
    29 // sequence parser
    32 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], 
    30 class SeqParser[I, T, S](p: => Parser[I, T], 
    33                                  q: => Parser[I, S]) extends Parser[I, (T, S)] {
    31                          q: => Parser[I, S])(using I => Seq[_]) extends Parser[I, (T, S)] {
    34   def parse(in: I) = 
    32   def parse(in: I) = 
    35     for ((hd1, tl1) <- p.parse(in); 
    33     for ((hd1, tl1) <- p.parse(in); 
    36          (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2)
    34          (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2)
    37 }
    35 }
    38 
    36 
    39 // map parser
    37 // map parser
    40 class MapParser[I : IsSeq, T, S](p: => Parser[I, T], 
    38 class MapParser[I, T, S](p: => Parser[I, T], 
    41                                  f: T => S) extends Parser[I, S] {
    39                          f: T => S)(using I => Seq[_]) extends Parser[I, S] {
    42   def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
    40   def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
    43 }
    41 }
    44 
    42 
    45 
    43 
    46 
    44 
    48 case class CharParser(c: Char) extends Parser[String, Char] {
    46 case class CharParser(c: Char) extends Parser[String, Char] {
    49   def parse(in: String) = 
    47   def parse(in: String) = 
    50     if (in != "" && in.head == c) Set((c, in.tail)) else Set()
    48     if (in != "" && in.head == c) Set((c, in.tail)) else Set()
    51 }
    49 }
    52 
    50 
    53 CharParser('c').parse("abc")
    51 CharParser('a').parse("abc")
    54 
    52 
    55 // an atomic parser for parsing strings according to a regex
    53 // an atomic parser for parsing strings according to a regex
    56 import scala.util.matching.Regex
    54 import scala.util.matching.Regex
    57 
    55 
    58 case class RegexParser(reg: Regex) extends Parser[String, String] {
    56 case class RegexParser(reg: Regex) extends Parser[String, String] {
    65 // atomic parsers for numbers and "verbatim" strings 
    63 // atomic parsers for numbers and "verbatim" strings 
    66 val NumParser = RegexParser("[0-9]+".r)
    64 val NumParser = RegexParser("[0-9]+".r)
    67 def StrParser(s: String) = RegexParser(Regex.quote(s).r)
    65 def StrParser(s: String) = RegexParser(Regex.quote(s).r)
    68 
    66 
    69 NumParser.parse("a123a123bc") 
    67 NumParser.parse("a123a123bc") 
    70 StrParser("else").parse("eelsethen")
    68 StrParser("else").parse("elsethen")
    71 
    69 
    72 
    70 
    73 // NumParserInt transforms a "string integer" into a propper Int
    71 // NumParserInt transforms a "string integer" into a propper Int
    74 // (needs "new" because MapParser is not a case class)
    72 // (needs "new" because MapParser is not a case class)
    75 
    73 
    79 // the following string interpolation allows us to write 
    77 // the following string interpolation allows us to write 
    80 // StrParser(_some_string_) more conveniently as 
    78 // StrParser(_some_string_) more conveniently as 
    81 //
    79 //
    82 // p"<_some_string_>" 
    80 // p"<_some_string_>" 
    83 
    81 
    84 
    82 extension (sc: StringContext) 
    85 implicit def parser_interpolation(sc: StringContext) = new {
       
    86   def p(args: Any*) = StrParser(sc.s(args:_*))
    83   def p(args: Any*) = StrParser(sc.s(args:_*))
    87 }
    84 
    88 
    85 
    89 (p"else").parse("elsethen")           
    86 (p"else").parse("elsethen")           
    90 
    87 
    91 // more convenient syntax for parser combinators
    88 // more convenient syntax for parser combinators
    92 implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new {
    89 extension [I, T](p: Parser[I, T])(using I => Seq[_]) {
    93   def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
    90   def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
    94   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
    91   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
    95   def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
    92   def mapp[S](f: => T => S) = new MapParser[I, T, S](p, f)
    96 }
    93 }
    97 
    94 
    98 def toU(s: String) = s.map(_.toUpper)
    95 def toU(s: String) = s.map(_.toUpper)
    99 
    96 
   100 (p"ELSE").map(toU(_)).parse("ELSEifthen")  
    97 (p"ELSE").mapp(toU(_)).parse("ELSEifthen")  
   101 
    98 
   102 // these implicits allow us to use an infix notation for
    99 // these implicits allow us to use an infix notation for
   103 // sequences and alternatives; we also can write the usual
   100 // sequences and alternatives; we also can write the usual
   104 // map for a MapParser
   101 // map for a MapParser
   105 
   102 
   108 // as:
   105 // as:
   109 
   106 
   110 val NumParserInt2 = NumParser.map(_.toInt)
   107 val NumParserInt2 = NumParser.map(_.toInt)
   111 
   108 
   112 
   109 
   113 
       
   114 
       
   115 // A parser for palindromes (just returns them as string)
   110 // A parser for palindromes (just returns them as string)
   116 lazy val Pal : Parser[String, String] = {
   111 lazy val Pal : Parser[String, String] = {
   117    (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } || 
   112    (p"a" ~ Pal ~ p"a").mapp{ case ((x, y), z) => s"$x$y$z" } || 
   118    (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => s"$x$y$z" } || 
   113    (p"b" ~ Pal ~ p"b").mapp{ case ((x, y), z) => s"$x$y$z" } || 
   119     p"a" || p"b" || p""
   114     p"a" || p"b" || p""
   120 }  
   115 }  
   121 
   116 
   122 // examples
   117 // examples
   123 Pal.parse_all("abaaaba")
   118 Pal.parse_all("abaaaba")
   129 //
   124 //
   130 //   P ::= ( P ) P | epsilon
   125 //   P ::= ( P ) P | epsilon
   131 //
   126 //
   132 //   (transforms '(' -> '{' , ')' -> '}' )
   127 //   (transforms '(' -> '{' , ')' -> '}' )
   133 lazy val P : Parser[String, String] = {
   128 lazy val P : Parser[String, String] = {
   134   (p"(" ~ P ~ p")" ~ P).map{ case (((_, x), _), y) => "{" + x + "}" + y } ||
   129   (p"(" ~ P ~ p")" ~ P).mapp{ case (((_, x), _), y) => "{" + x + "}" + y } ||
   135   p""
   130   p""
   136 }  
   131 }  
   137 
   132 
   138 println(P.parse_all("(((()()))())"))
   133 println(P.parse_all("(((()()))())"))
   139 println(P.parse_all("(((()()))()))"))
   134 println(P.parse_all("(((()()))()))"))
   231 
   226 
   232 println("Runtime problem")
   227 println("Runtime problem")
   233 println(E.parse("1"))
   228 println(E.parse("1"))
   234 println(E.parse("(1)"))
   229 println(E.parse("(1)"))
   235 println(E.parse("((1))"))
   230 println(E.parse("((1))"))
   236 //println(E.parse("(((1)))"))
   231 println(E.parse("(((1)))"))
   237 //println(E.parse("((((1))))"))
   232 println(E.parse("((((1))))"))
   238 //println(E.parse("((((((1))))))"))
   233 //println(E.parse("((((((1))))))"))
   239 //println(E.parse("(((((((1)))))))"))
   234 //println(E.parse("(((((((1)))))))"))
   240 //println(E.parse("((((((((1)))))))"))
   235 //println(E.parse("((((((((1))))))))"))
   241 
   236 
   242 
   237 
   243 
   238 // faster because of merge
   244 
   239 lazy val E2: Parser[String, Int] = {
   245 // runs with amm2 and amm3
   240   (T2 ~ (p"+" || p"-") ~ E2).mapp[Int]{ case ((x, y), z) => if (y == "+") x + z else x - z} || T2 }
       
   241 lazy val T2: Parser[String, Int] = {
       
   242   (F2 ~ p"*" ~ T2).mapp[Int]{ case ((x, _), z) => x * z } || F2 }
       
   243 lazy val F2: Parser[String, Int] = {
       
   244   (p"(" ~ E2 ~ p")").mapp[Int]{ case ((_, y), _) => y } || NumParserInt }
       
   245 
       
   246 
       
   247 println("Runtime problem")
       
   248 println(E2.parse("1"))
       
   249 println(E2.parse("(1)"))
       
   250 println(E2.parse("((1))"))
       
   251 println(E2.parse("(((1)))"))
       
   252 println(E2.parse("((((1))))"))
       
   253 //println(E2.parse("((((((1))))))"))
       
   254 //println(E2.parse("(((((((1)))))))"))
       
   255 //println(E2.parse("((((((((1))))))))"))