progs/parser-combinators/c.sc
changeset 806 0d3bc1d0d987
parent 805 526e10d97435
child 807 9fa90c73c6f3
equal deleted inserted replaced
805:526e10d97435 806:0d3bc1d0d987
     1 // Parser Combinators: Simple Version
       
     2 //====================================
       
     3 //
       
     4 // Call with
       
     5 //
       
     6 //  amm comb1.sc
       
     7 
       
     8  
       
     9 //  Note, in the lectures I did not show the implicit type constraint
       
    10 //  I : IsSeq, which means that the input type 'I' needs to be
       
    11 //  a sequence. 
       
    12 
       
    13 type IsSeq[A] = A => Seq[_]
       
    14 
       
    15 abstract class Parser[I : IsSeq, T]{
       
    16   def parse(in: I): Set[(T, I)]
       
    17 
       
    18   def parse_all(in: I) : Set[T] =
       
    19     for ((hd, tl) <- parse(in); 
       
    20         if tl.isEmpty) yield hd
       
    21 }
       
    22 
       
    23 // parser combinators
       
    24 
       
    25 // alternative parser
       
    26 class AltParser[I : IsSeq, T](p: => Parser[I, T], 
       
    27                               q: => Parser[I, T]) extends Parser[I, T] {
       
    28   def parse(in: I) = p.parse(in) ++ q.parse(in)   
       
    29 }
       
    30 
       
    31 // sequence parser
       
    32 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], 
       
    33                                  q: => Parser[I, S]) extends Parser[I, (T, S)] {
       
    34   def parse(in: I) = 
       
    35     for ((hd1, tl1) <- p.parse(in); 
       
    36          (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2)
       
    37 }
       
    38 
       
    39 // map parser
       
    40 class MapParser[I : IsSeq, T, S](p: => Parser[I, T], 
       
    41                                  f: T => S) extends Parser[I, S] {
       
    42   def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
       
    43 }
       
    44 
       
    45 
       
    46 
       
    47 // an example of an atomic parser for characters
       
    48 case class CharParser(c: Char) extends Parser[String, Char] {
       
    49   def parse(in: String) = 
       
    50     if (in != "" && in.head == c) Set((c, in.tail)) else Set()
       
    51 }
       
    52 
       
    53 
       
    54 // an atomic parser for parsing strings according to a regex
       
    55 import scala.util.matching.Regex
       
    56 
       
    57 case class RegexParser(reg: Regex) extends Parser[String, String] {
       
    58   def parse(in: String) = reg.findPrefixMatchOf(in) match {
       
    59     case None => Set()
       
    60     case Some(m) => Set((m.matched, m.after.toString))  
       
    61   }
       
    62 }
       
    63 
       
    64 // atomic parsers for numbers and "verbatim" strings 
       
    65 val NumParser = RegexParser("[0-9]+".r)
       
    66 def StrParser(s: String) = RegexParser(Regex.quote(s).r)
       
    67 
       
    68 
       
    69 
       
    70 // NumParserInt transforms a "string integer" into a propper Int
       
    71 // (needs "new" because MapParser is not a case class)
       
    72 
       
    73 val NumParserInt = new MapParser(NumParser, (s: String) => s.toInt)
       
    74 
       
    75 
       
    76 // the following string interpolation allows us to write 
       
    77 // StrParser(_some_string_) more conveniently as 
       
    78 //
       
    79 // p"<_some_string_>" 
       
    80 
       
    81 implicit def parser_interpolation(sc: StringContext) = new {
       
    82   def p(args: Any*) = StrParser(sc.s(args:_*))
       
    83 }
       
    84            
       
    85 
       
    86 // more convenient syntax for parser combinators
       
    87 implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new {
       
    88   def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
       
    89   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
       
    90   def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
       
    91 }
       
    92 
       
    93 // these implicits allow us to use an infix notation for
       
    94 // sequences and alternatives; we also can write the usual
       
    95 // map for a MapParser
       
    96 
       
    97 
       
    98 // with this NumParserInt can now be written more conveniently
       
    99 // as:
       
   100 
       
   101 val NumParserInt2 = NumParser.map(_.toInt)
       
   102 
       
   103 
       
   104 // A parser for palindromes (just returns them as string)
       
   105 lazy val Pal : Parser[String, String] = {
       
   106   (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } || 
       
   107   (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => s"$x$y$z" } || 
       
   108   p"a" || p"b" || p""
       
   109 }  
       
   110 
       
   111 // examples
       
   112 Pal.parse_all("abaaaba")
       
   113 Pal.parse("abaaaba")
       
   114 
       
   115 println("Palindrome: " + Pal.parse_all("abaaaba"))
       
   116 
       
   117 // A parser for wellnested parentheses 
       
   118 //
       
   119 //   P ::= ( P ) P | epsilon
       
   120 //
       
   121 //   (transforms '(' -> '{' , ')' -> '}' )
       
   122 lazy val P : Parser[String, String] = {
       
   123   (p"(" ~ P ~ p")" ~ P).map{ case (((_, x), _), y) => "{" + x + "}" + y } ||
       
   124   p""
       
   125 }  
       
   126 
       
   127 println(P.parse_all("(((()()))())"))
       
   128 println(P.parse_all("(((()()))()))"))
       
   129 println(P.parse_all(")("))
       
   130 println(P.parse_all("()"))
       
   131 
       
   132 // A parser for arithmetic expressions (Terms and Factors)
       
   133 
       
   134 lazy val E: Parser[String, Int] = {
       
   135   (T ~ p"+" ~ E).map{ case ((x, _), z) => x + z } ||
       
   136   (T ~ p"-" ~ E).map{ case ((x, _), z) => x - z } || T }
       
   137 lazy val T: Parser[String, Int] = {
       
   138   (F ~ p"*" ~ T).map{ case ((x, _), z) => x * z } || F }
       
   139 lazy val F: Parser[String, Int] = {
       
   140   (p"(" ~ E ~ p")").map{ case ((_, y), _) => y } || NumParserInt }
       
   141 
       
   142 println(E.parse_all("1+3+4"))
       
   143 println(E.parse("1+3+4"))
       
   144 println(E.parse_all("4*2+3"))
       
   145 println(E.parse_all("4*(2+3)"))
       
   146 println(E.parse_all("(4)*((2+3))"))
       
   147 println(E.parse_all("4/2+3"))
       
   148 println(E.parse("1 + 2 * 3"))
       
   149 println(E.parse_all("(1+2)+3"))
       
   150 println(E.parse_all("1+2+3"))
       
   151 
       
   152 
       
   153 // with parser combinators (and other parsing algorithms)
       
   154 // no left-recursion is allowed, otherwise the will loop
       
   155 
       
   156 lazy val EL: Parser[String, Int] = 
       
   157   ((EL ~ p"+" ~ EL).map{ case ((x, y), z) => x + z} || 
       
   158    (EL ~ p"*" ~ EL).map{ case ((x, y), z) => x * z} ||
       
   159    (p"(" ~ EL ~ p")").map{ case ((x, y), z) => y} ||
       
   160    NumParserInt)
       
   161 
       
   162 // this will run forever:
       
   163 //println(EL.parse_all("1+2+3"))
       
   164 
       
   165 
       
   166 // non-ambiguous vs ambiguous grammars
       
   167 
       
   168 // ambiguous
       
   169 lazy val S : Parser[String, String] =
       
   170   (p"1" ~ S ~ S).map{ case ((x, y), z) => x + y + z } || p""
       
   171 
       
   172 //println(time(S.parse("1" * 10)))
       
   173 //println(time(S.parse_all("1" * 10)))
       
   174 
       
   175 // non-ambiguous
       
   176 lazy val U : Parser[String, String] =
       
   177   (p"1" ~ U).map{ case (x, y) => x + y } || p""
       
   178 
       
   179 //println(time(U.parse("1" * 10)))
       
   180 //println(time(U.parse_all("1" * 10)))
       
   181 println(U.parse("1" * 25))
       
   182 
       
   183 U.parse("11")
       
   184 U.parse("11111")
       
   185 U.parse("11011")
       
   186 
       
   187 U.parse_all("1" * 100)
       
   188 U.parse_all("1" * 100 + "0")
       
   189 
       
   190 // you can see the difference in second example
       
   191 //S.parse_all("1" * 100)         // succeeds
       
   192 //S.parse_all("1" * 100 + "0")   // fails
       
   193 
       
   194 
       
   195 // A variant which counts how many 1s are parsed
       
   196 lazy val UCount : Parser[String, Int] =
       
   197   (p"1" ~ UCount).map{ case (_, y) => y + 1 } || p"".map{ _ => 0 }
       
   198 
       
   199 println(UCount.parse("11111"))
       
   200 println(UCount.parse_all("11111"))
       
   201 
       
   202 // Two single character parsers
       
   203 lazy val One : Parser[String, String] = p"a"
       
   204 lazy val Two : Parser[String, String] = p"b"
       
   205 
       
   206 One.parse("a")
       
   207 One.parse("aaa")
       
   208 
       
   209 // note how the pairs nest to the left with sequence parsers
       
   210 (One ~ One).parse("aaa")
       
   211 (One ~ One ~ One).parse("aaa")
       
   212 (One ~ One ~ One ~ One).parse("aaaa")
       
   213 
       
   214 (One || Two).parse("aaa")
       
   215 
       
   216 
       
   217 
       
   218 // a problem with the arithmetic expression parser: it 
       
   219 // gets very slow with deeply nested parentheses
       
   220 
       
   221 println("Runtime problem")
       
   222 println(E.parse("1"))
       
   223 println(E.parse("(1)"))
       
   224 println(E.parse("((1))"))
       
   225 //println(E.parse("(((1)))"))
       
   226 //println(E.parse("((((1))))"))
       
   227 //println(E.parse("((((((1))))))"))
       
   228 //println(E.parse("(((((((1)))))))"))
       
   229 //println(E.parse("((((((((1)))))))"))