progs/parser-combinators/comb1.sc
changeset 799 85267be9a5ed
parent 742 b5b5583a3a08
child 801 7aab258bf72a
equal deleted inserted replaced
798:aaf0bd0a211d 799:85267be9a5ed
    19     for ((hd, tl) <- parse(in); 
    19     for ((hd, tl) <- parse(in); 
    20         if tl.isEmpty) yield hd
    20         if tl.isEmpty) yield hd
    21 }
    21 }
    22 
    22 
    23 // parser combinators
    23 // parser combinators
       
    24 
       
    25 // alternative parser
       
    26 class AltParser[I : IsSeq, T](p: => Parser[I, T], 
       
    27                               q: => Parser[I, T]) extends Parser[I, T] {
       
    28   def parse(in: I) = p.parse(in) ++ q.parse(in)   
       
    29 }
    24 
    30 
    25 // sequence parser
    31 // sequence parser
    26 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], 
    32 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], 
    27                                  q: => Parser[I, S]) extends Parser[I, (T, S)] {
    33                                  q: => Parser[I, S]) extends Parser[I, (T, S)] {
    28   def parse(in: I) = 
    34   def parse(in: I) = 
    29     for ((hd1, tl1) <- p.parse(in); 
    35     for ((hd1, tl1) <- p.parse(in); 
    30          (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2)
    36          (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2)
    31 }
    37 }
    32 
    38 
    33 // alternative parser
       
    34 class AltParser[I : IsSeq, T](p: => Parser[I, T], 
       
    35                               q: => Parser[I, T]) extends Parser[I, T] {
       
    36   def parse(in: I) = p.parse(in) ++ q.parse(in)   
       
    37 }
       
    38 
       
    39 // map parser
    39 // map parser
    40 class MapParser[I : IsSeq, T, S](p: => Parser[I, T], 
    40 class MapParser[I : IsSeq, T, S](p: => Parser[I, T], 
    41                                  f: T => S) extends Parser[I, S] {
    41                                  f: T => S) extends Parser[I, S] {
    42   def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
    42   def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
    43 }
    43 }
    47 // an example of an atomic parser for characters
    47 // an example of an atomic parser for characters
    48 case class CharParser(c: Char) extends Parser[String, Char] {
    48 case class CharParser(c: Char) extends Parser[String, Char] {
    49   def parse(in: String) = 
    49   def parse(in: String) = 
    50     if (in != "" && in.head == c) Set((c, in.tail)) else Set()
    50     if (in != "" && in.head == c) Set((c, in.tail)) else Set()
    51 }
    51 }
       
    52 
    52 
    53 
    53 // an atomic parser for parsing strings according to a regex
    54 // an atomic parser for parsing strings according to a regex
    54 import scala.util.matching.Regex
    55 import scala.util.matching.Regex
    55 
    56 
    56 case class RegexParser(reg: Regex) extends Parser[String, String] {
    57 case class RegexParser(reg: Regex) extends Parser[String, String] {
    63 // atomic parsers for numbers and "verbatim" strings 
    64 // atomic parsers for numbers and "verbatim" strings 
    64 val NumParser = RegexParser("[0-9]+".r)
    65 val NumParser = RegexParser("[0-9]+".r)
    65 def StrParser(s: String) = RegexParser(Regex.quote(s).r)
    66 def StrParser(s: String) = RegexParser(Regex.quote(s).r)
    66 
    67 
    67 
    68 
       
    69 
    68 // NumParserInt transforms a "string integer" into a propper Int
    70 // NumParserInt transforms a "string integer" into a propper Int
    69 // (needs "new" because MapParser is not a case class)
    71 // (needs "new" because MapParser is not a case class)
    70 
    72 
    71 val NumParserInt = new MapParser(NumParser, (s: String) => s.toInt)
    73 val NumParserInt = new MapParser(NumParser, (s: String) => s.toInt)
    72 
    74 
    77 // p"<_some_string_>" 
    79 // p"<_some_string_>" 
    78 
    80 
    79 implicit def parser_interpolation(sc: StringContext) = new {
    81 implicit def parser_interpolation(sc: StringContext) = new {
    80   def p(args: Any*) = StrParser(sc.s(args:_*))
    82   def p(args: Any*) = StrParser(sc.s(args:_*))
    81 }
    83 }
       
    84 
    82 
    85 
    83 // more convenient syntax for parser combinators
    86 // more convenient syntax for parser combinators
    84 implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new {
    87 implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new {
    85   def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
    88   def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
    86   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
    89   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
    93 
    96 
    94 
    97 
    95 // with this NumParserInt can now be written more conveniently
    98 // with this NumParserInt can now be written more conveniently
    96 // as:
    99 // as:
    97 
   100 
    98 val NumParserInt2 = NumParser.map(s => s.toInt)
   101 val NumParserInt2 = NumParser.map(_.toInt)
    99 
   102 
   100 
   103 
   101 // A parser for palindromes (just returns them as string)
   104 // A parser for palindromes (just returns them as string)
   102 lazy val Pal : Parser[String, String] = {
   105 lazy val Pal : Parser[String, String] = {
   103   (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } || 
   106   (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } || 
   109 Pal.parse_all("abaaaba")
   112 Pal.parse_all("abaaaba")
   110 Pal.parse("abaaaba")
   113 Pal.parse("abaaaba")
   111 
   114 
   112 println("Palindrome: " + Pal.parse_all("abaaaba"))
   115 println("Palindrome: " + Pal.parse_all("abaaaba"))
   113 
   116 
   114 // A parser for wellnested parentheses (transforms '(' -> '{' , ')' -> '}' )
   117 // A parser for wellnested parentheses 
   115 lazy val P : Parser[String, String] = 
   118 //
   116   (p"(" ~ P ~ p")" ~ P).map{ case (((_, x), _), y) => "{" + x + "}" + y } || p""
   119 //   P ::= ( P ) P | epsilon
       
   120 //
       
   121 //   (transforms '(' -> '{' , ')' -> '}' )
       
   122 lazy val P : Parser[String, String] = {
       
   123   (p"(" ~ P ~ p")" ~ P).map{ case (((_, x), _), y) => "{" + x + "}" + y } ||
       
   124   p""
       
   125 }  
   117 
   126 
   118 println(P.parse_all("(((()()))())"))
   127 println(P.parse_all("(((()()))())"))
   119 println(P.parse_all("(((()()))()))"))
   128 println(P.parse_all("(((()()))()))"))
   120 println(P.parse_all(")("))
   129 println(P.parse_all(")("))
   121 println(P.parse_all("()"))
   130 println(P.parse_all("()"))
   122 
   131 
   123 // A parser for arithmetic expressions (Terms and Factors)
   132 // A parser for arithmetic expressions (Terms and Factors)
   124 
   133 
   125 lazy val E: Parser[String, Int] = 
   134 lazy val E: Parser[String, Int] = {
   126   (T ~ p"+" ~ E).map{ case ((x, _), z) => x + z } ||
   135   (T ~ p"+" ~ E).map{ case ((x, _), z) => x + z } ||
   127   (T ~ p"-" ~ E).map{ case ((x, _), z) => x - z } || T 
   136   (T ~ p"-" ~ E).map{ case ((x, _), z) => x - z } || T }
   128 lazy val T: Parser[String, Int] = 
   137 lazy val T: Parser[String, Int] = {
   129   (F ~ p"*" ~ T).map{ case ((x, _), z) => x * z } || F
   138   (F ~ p"*" ~ T).map{ case ((x, _), z) => x * z } || F }
   130 lazy val F: Parser[String, Int] = 
   139 lazy val F: Parser[String, Int] = {
   131   (p"(" ~ E ~ p")").map{ case ((_, y), _) => y } || NumParserInt
   140   (p"(" ~ E ~ p")").map{ case ((_, y), _) => y } || NumParserInt }
   132 
       
   133 /* same parser but producing a string
       
   134 lazy val E: Parser[String, String] = 
       
   135   (T ~ "+" ~ E).map{ case x ~ y ~ z => "(" + x + ")+(" + z + ")"} || T 
       
   136 lazy val T: Parser[String, String] = 
       
   137   (F ~ "*" ~ T).map{ case x ~ y ~ z => "(" + x + ")*("+ z + ")"} || F
       
   138 lazy val F: Parser[String, String] = 
       
   139   ("(" ~ E ~ ")").map{ case x ~ y ~ z => y } || NumParser
       
   140 */
       
   141 
   141 
   142 println(E.parse_all("1+3+4"))
   142 println(E.parse_all("1+3+4"))
   143 println(E.parse("1+3+4"))
   143 println(E.parse("1+3+4"))
   144 println(E.parse_all("4*2+3"))
   144 println(E.parse_all("4*2+3"))
   145 println(E.parse_all("4*(2+3)"))
   145 println(E.parse_all("4*(2+3)"))
   146 println(E.parse_all("(4)*((2+3))"))
   146 println(E.parse_all("(4)*((2+3))"))
   147 println(E.parse_all("4/2+3"))
   147 println(E.parse_all("4/2+3"))
   148 println(E.parse("1 + 2 * 3"))
   148 println(E.parse("1 + 2 * 3"))
   149 println(E.parse_all("(1+2)+3"))
   149 println(E.parse_all("(1+2)+3"))
   150 println(E.parse_all("1+2+3"))  
   150 println(E.parse_all("1+2+3"))
   151 
   151 
   152 
   152 
   153 // with parser combinators (and other parsing algorithms)
   153 // with parser combinators (and other parsing algorithms)
   154 // no left-recursion is allowed, otherwise the will loop
   154 // no left-recursion is allowed, otherwise the will loop
   155 
   155 
   192 //S.parse_all("1" * 100 + "0")   // fails
   192 //S.parse_all("1" * 100 + "0")   // fails
   193 
   193 
   194 
   194 
   195 // A variant which counts how many 1s are parsed
   195 // A variant which counts how many 1s are parsed
   196 lazy val UCount : Parser[String, Int] =
   196 lazy val UCount : Parser[String, Int] =
   197   (p"1" ~ UCount).map[Int]{ case (_, y) => y + 1 } || p"".map[Int]{ _ => 0 }
   197   (p"1" ~ UCount).map{ case (_, y) => y + 1 } || p"".map{ _ => 0 }
   198 
   198 
   199 println(UCount.parse("11111"))
   199 println(UCount.parse("11111"))
   200 println(UCount.parse_all("11111"))
   200 println(UCount.parse_all("11111"))
   201 
   201 
   202 // Two single character parsers
   202 // Two single character parsers