| 
     1 // Parser Combinators: Simple Version  | 
         | 
     2 //====================================  | 
         | 
     3 //  | 
         | 
     4 // Call with Ammonite (Scala 2.13.10)  | 
         | 
     5 //  | 
         | 
     6 //  amm comb1-2.sc  | 
         | 
     7   | 
         | 
     8    | 
         | 
     9 //  Note, in the lectures I did not show the implicit type bound  | 
         | 
    10 //  I : IsSeq, which means that the input type 'I' needs to be  | 
         | 
    11 //  a sequence.   | 
         | 
    12   | 
         | 
    13 type IsSeq[A] = A => Seq[_]  | 
         | 
    14   | 
         | 
    15 abstract class Parser[I : IsSeq, T]{ | 
         | 
    16   def parse(in: I): Set[(T, I)]    | 
         | 
    17   | 
         | 
    18   def parse_all(in: I) : Set[T] =  | 
         | 
    19     for ((hd, tl) <- parse(in);   | 
         | 
    20         if tl.isEmpty) yield hd  | 
         | 
    21 }  | 
         | 
    22   | 
         | 
    23 // parser combinators  | 
         | 
    24   | 
         | 
    25 // alternative parser  | 
         | 
    26 class AltParser[I : IsSeq, T](p: => Parser[I, T],   | 
         | 
    27                               q: => Parser[I, T]) extends Parser[I, T] { | 
         | 
    28   def parse(in: I) = p.parse(in) ++ q.parse(in)     | 
         | 
    29 }  | 
         | 
    30   | 
         | 
    31 // sequence parser  | 
         | 
    32 class SeqParser[I : IsSeq, T, S](p: => Parser[I, T],   | 
         | 
    33                                  q: => Parser[I, S]) extends Parser[I, (T, S)] { | 
         | 
    34   def parse(in: I) =   | 
         | 
    35     for ((hd1, tl1) <- p.parse(in);   | 
         | 
    36          (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2)  | 
         | 
    37 }  | 
         | 
    38   | 
         | 
    39 // map parser  | 
         | 
    40 class MapParser[I : IsSeq, T, S](p: => Parser[I, T],   | 
         | 
    41                                  f: T => S) extends Parser[I, S] { | 
         | 
    42   def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)  | 
         | 
    43 }  | 
         | 
    44   | 
         | 
    45   | 
         | 
    46   | 
         | 
    47 // an example of an atomic parser for characters  | 
         | 
    48 case class CharParser(c: Char) extends Parser[String, Char] { | 
         | 
    49   def parse(in: String) =   | 
         | 
    50     if (in != "" && in.head == c) Set((c, in.tail)) else Set()  | 
         | 
    51 }  | 
         | 
    52   | 
         | 
    53 CharParser('c').parse("abc") | 
         | 
    54   | 
         | 
    55 // an atomic parser for parsing strings according to a regex  | 
         | 
    56 import scala.util.matching.Regex  | 
         | 
    57   | 
         | 
    58 case class RegexParser(reg: Regex) extends Parser[String, String] { | 
         | 
    59   def parse(in: String) = reg.findPrefixMatchOf(in) match { | 
         | 
    60     case None => Set()  | 
         | 
    61     case Some(m) => Set((m.matched, m.after.toString))    | 
         | 
    62   }  | 
         | 
    63 }  | 
         | 
    64   | 
         | 
    65 // atomic parsers for numbers and "verbatim" strings   | 
         | 
    66 val NumParser = RegexParser("[0-9]+".r) | 
         | 
    67 def StrParser(s: String) = RegexParser(Regex.quote(s).r)  | 
         | 
    68   | 
         | 
    69 NumParser.parse("a123a123bc")  | 
         | 
    70 StrParser("else").parse("eelsethen") | 
         | 
    71   | 
         | 
    72   | 
         | 
    73 // NumParserInt transforms a "string integer" into a proper Int  | 
         | 
    74 // (needs "new" because MapParser is not a case class)  | 
         | 
    75   | 
         | 
    76 val NumParserInt = new MapParser(NumParser, (s: String) => s.toInt)  | 
         | 
    77 NumParserInt.parse("123abc") | 
         | 
    78   | 
         | 
    79 // the following string interpolation allows us to write   | 
         | 
    80 // StrParser(_some_string_) more conveniently as   | 
         | 
    81 //  | 
         | 
    82 // p"<_some_string_>"   | 
         | 
    83   | 
         | 
    84   | 
         | 
    85 implicit def parser_interpolation(sc: StringContext) = new { | 
         | 
    86   def p(args: Any*) = StrParser(sc.s(args:_*))  | 
         | 
    87 }  | 
         | 
    88   | 
         | 
    89 (p"else").parse("elsethen")            | 
         | 
    90   | 
         | 
    91 // more convenient syntax for parser combinators  | 
         | 
    92   | 
         | 
    93 implicit def ParserOps[I : IsSeq, T](p: Parser[I, T]) = new { | 
         | 
    94   def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)  | 
         | 
    95   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)  | 
         | 
    96   def map[S](f: => T => S) = new MapParser[I, T, S](p, f)  | 
         | 
    97 }  | 
         | 
    98   | 
         | 
    99 // example  | 
         | 
   100 def toU(s: String) = s.map(_.toUpper)  | 
         | 
   101 (p"ELSE").map(toU(_)).parse("ELSEifthen")   | 
         | 
   102   | 
         | 
   103 // these implicits allow us to use an infix notation for  | 
         | 
   104 // sequences and alternatives; we also can write the usual  | 
         | 
   105 // map for a MapParser  | 
         | 
   106   | 
         | 
   107   | 
         | 
   108 // with this NumParserInt can now be written more conveniently  | 
         | 
   109 // as:  | 
         | 
   110   | 
         | 
   111 val NumParserInt2 = NumParser.map(_.toInt)  | 
         | 
   112   | 
         | 
   113   | 
         | 
   114   | 
         | 
   115 // A parser for palindromes (just returns them as string)  | 
         | 
   116 lazy val Pal : Parser[String, String] = { | 
         | 
   117    (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } ||  | 
         | 
   118    (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => s"$x$y$z" } ||  | 
         | 
   119     p"a" || p"b" || p""  | 
         | 
   120 }    | 
         | 
   121   | 
         | 
   122 // examples  | 
         | 
   123 Pal.parse_all("abaaaba") | 
         | 
   124 Pal.parse("abaaaba") | 
         | 
   125   | 
         | 
   126 println("Palindrome: " + Pal.parse_all("abaaaba")) | 
         | 
   127   | 
         | 
   128 // A parser for wellnested parentheses   | 
         | 
   129 //  | 
         | 
   130 //   P ::= ( P ) P | epsilon  | 
         | 
   131 //  | 
         | 
   132 //   (transforms '(' -> '{' , ')' -> '}' ) | 
         | 
   133 lazy val P : Parser[String, String] = { | 
         | 
   134   (p"(" ~ P ~ p")" ~ P).map{ case (((_, x), _), y) => "{" + x + "}" + y } || | 
         | 
   135   p""  | 
         | 
   136 }    | 
         | 
   137   | 
         | 
   138 println(P.parse_all("(((()()))())")) | 
         | 
   139 println(P.parse_all("(((()()))()))")) | 
         | 
   140 println(P.parse_all(")(")) | 
         | 
   141 println(P.parse_all("()")) | 
         | 
   142   | 
         | 
   143 // A parser for arithmetic expressions (Terms and Factors)  | 
         | 
   144   | 
         | 
   145 lazy val E: Parser[String, Int] = { | 
         | 
   146   (T ~ p"+" ~ E).map{ case ((x, _), z) => x + z } || | 
         | 
   147   (T ~ p"-" ~ E).map{ case ((x, _), z) => x - z } || T } | 
         | 
   148 lazy val T: Parser[String, Int] = { | 
         | 
   149   (F ~ p"*" ~ T).map{ case ((x, _), z) => x * z } || F } | 
         | 
   150 lazy val F: Parser[String, Int] = { | 
         | 
   151   (p"(" ~ E ~ p")").map{ case ((_, y), _) => y } || NumParserInt } | 
         | 
   152   | 
         | 
   153 println(E.parse_all("1+3+4")) | 
         | 
   154 println(E.parse("1+3+4")) | 
         | 
   155 println(E.parse_all("4*2+3")) | 
         | 
   156 println(E.parse_all("4*(2+3)")) | 
         | 
   157 println(E.parse_all("(4)*((2+3))")) | 
         | 
   158 println(E.parse_all("4/2+3")) | 
         | 
   159 println(E.parse("1 + 2 * 3")) | 
         | 
   160 println(E.parse_all("(1+2)+3")) | 
         | 
   161 println(E.parse_all("1+2+3")) | 
         | 
   162   | 
         | 
   163   | 
         | 
   164 // with parser combinators (and other parsing algorithms)  | 
         | 
   165 // no left-recursion is allowed, otherwise they will loop  | 
         | 
   166   | 
         | 
   167 lazy val EL: Parser[String, Int] =   | 
         | 
   168   ((EL ~ p"+" ~ EL).map{ case ((x, y), z) => x + z} ||  | 
         | 
   169    (EL ~ p"*" ~ EL).map{ case ((x, y), z) => x * z} || | 
         | 
   170    (p"(" ~ EL ~ p")").map{ case ((x, y), z) => y} || | 
         | 
   171    NumParserInt)  | 
         | 
   172   | 
         | 
   173 // this will run forever:  | 
         | 
   174 //println(EL.parse_all("1+2+3")) | 
         | 
   175   | 
         | 
   176   | 
         | 
   177 // non-ambiguous vs ambiguous grammars  | 
         | 
   178   | 
         | 
   179 // ambiguous  | 
         | 
   180 lazy val S : Parser[String, String] =  | 
         | 
   181   (p"1" ~ S ~ S).map{ case ((x, y), z) => x + y + z } || p"" | 
         | 
   182   | 
         | 
   183 //println(time(S.parse("1" * 10))) | 
         | 
   184 //println(time(S.parse_all("1" * 10))) | 
         | 
   185   | 
         | 
   186 // non-ambiguous  | 
         | 
   187 lazy val U : Parser[String, String] =  | 
         | 
   188   (p"1" ~ U).map{ case (x, y) => x + y } || p"" | 
         | 
   189   | 
         | 
   190 //println(time(U.parse("1" * 10))) | 
         | 
   191 //println(time(U.parse_all("1" * 10))) | 
         | 
   192 println(U.parse("1" * 25)) | 
         | 
   193   | 
         | 
   194 U.parse("11") | 
         | 
   195 U.parse("11111") | 
         | 
   196 U.parse("11011") | 
         | 
   197   | 
         | 
   198 U.parse_all("1" * 100) | 
         | 
   199 U.parse_all("1" * 100 + "0") | 
         | 
   200   | 
         | 
   201 // you can see the difference in second example  | 
         | 
   202 //S.parse_all("1" * 100)         // succeeds | 
         | 
   203 //S.parse_all("1" * 100 + "0")   // fails | 
         | 
   204   | 
         | 
   205   | 
         | 
   206 // A variant which counts how many 1s are parsed  | 
         | 
   207 lazy val UCount : Parser[String, Int] =  | 
         | 
   208   (p"1" ~ UCount).map{ case (_, y) => y + 1 } || p"".map{ _ => 0 } | 
         | 
   209   | 
         | 
   210 println(UCount.parse("11111")) | 
         | 
   211 println(UCount.parse_all("11111")) | 
         | 
   212   | 
         | 
   213 // Two single character parsers  | 
         | 
   214 lazy val One : Parser[String, String] = p"a"  | 
         | 
   215 lazy val Two : Parser[String, String] = p"b"  | 
         | 
   216   | 
         | 
   217 One.parse("a") | 
         | 
   218 One.parse("aaa") | 
         | 
   219   | 
         | 
   220 // note how the pairs nest to the left with sequence parsers  | 
         | 
   221 (One ~ One).parse("aaa") | 
         | 
   222 (One ~ One ~ One).parse("aaa") | 
         | 
   223 (One ~ One ~ One ~ One).parse("aaaa") | 
         | 
   224   | 
         | 
   225 (One || Two).parse("aaa") | 
         | 
   226   | 
         | 
   227   | 
         | 
   228   | 
         | 
   229 // a problem with the arithmetic expression parser: it   | 
         | 
   230 // gets very slow with deeply nested parentheses  | 
         | 
   231   | 
         | 
   232 println("Runtime problem") | 
         | 
   233 println(E.parse("1")) | 
         | 
   234 println(E.parse("(1)")) | 
         | 
   235 println(E.parse("((1))")) | 
         | 
   236 //println(E.parse("(((1)))")) | 
         | 
   237 //println(E.parse("((((1))))")) | 
         | 
   238 //println(E.parse("((((((1))))))")) | 
         | 
   239 //println(E.parse("(((((((1)))))))")) | 
         | 
   240 //println(E.parse("((((((((1)))))))")) | 
         | 
   241   |