progs/parser-combinators/comb1.sc
changeset 960 c7009356ddd8
parent 959 64ec1884d860
child 961 c0600f8b6427
equal deleted inserted replaced
959:64ec1884d860 960:c7009356ddd8
     3 //
     3 //
     4 // Call with
     4 // Call with
     5 //
     5 //
     6 //  amm comb1.sc
     6 //  amm comb1.sc
     7 
     7 
     8  
     8 
     9 //  Note, in the lectures I did not show the type bound
     9 //  Note, in the lectures I did not show the type bound
    10 //  using is: I => Seq[_], which means that the input 
    10 //  using is: I => Seq[_], which means that the input
    11 //  type 'I' needs to be a sequence. 
    11 //  type 'I' needs to be a sequence.
    12 
    12 
    13 type IsSeq[I] = I => Seq[_]
    13 type IsSeq[I] = I => Seq[?]
    14 
    14 
    15 abstract class Parser[I, T](using is: IsSeq[I])  {
    15 abstract class Parser[I: IsSeq, T](using is: IsSeq[I])  {
    16   def parse(in: I): Set[(T, I)]  
    16   def parse(in: I): Set[(T, I)]
    17 
    17 
    18   def parse_all(in: I) : Set[T] =
    18   def parse_all(in: I) : Set[T] =
    19     for ((hd, tl) <- parse(in); 
    19     for ((hd, tl) <- parse(in);
    20         if is(tl).isEmpty) yield hd
    20         if is(tl).isEmpty) yield hd
    21 }
    21 }
    22 
    22 
    23 
    23 
    24 // parser combinators
    24 // parser combinators
    25 
    25 
    26 
    26 
    27 // alternative parser
    27 // alternative parser
    28 class AltParser[I : IsSeq, T](p: => Parser[I, T], 
    28 class AltParser[I : IsSeq, T](p: => Parser[I, T],
    29                               q: => Parser[I, T]) extends Parser[I, T] {
    29                               q: => Parser[I, T]) extends Parser[I, T] {
    30   def parse(in: I) = p.parse(in) ++ q.parse(in)   
    30   def parse(in: I) = p.parse(in) ++ q.parse(in)
    31 }
    31 }
    32 
    32 
    33 // sequence parser
    33 // sequence parser
    34 class SeqParser[I: IsSeq, T, S](p: => Parser[I, T], 
    34 class SeqParser[I: IsSeq, T, S](p: => Parser[I, T],
    35                                 q: => Parser[I, S]) extends Parser[I, (T, S)] {
    35                                 q: => Parser[I, S]) extends Parser[I, (T, S)] {
    36   def parse(in: I) = 
    36   def parse(in: I) =
    37     for ((hd1, tl1) <- p.parse(in); 
    37     for ((hd1, tl1) <- p.parse(in);
    38          (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2)
    38          (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2)
    39 }
    39 }
    40 
    40 
    41 // map parser
    41 // map parser
    42 class MapParser[I : IsSeq, T, S](p: => Parser[I, T], 
    42 class MapParser[I : IsSeq, T, S](p: => Parser[I, T],
    43                                  f: T => S) extends Parser[I, S] {
    43                                  f: T => S) extends Parser[I, S] {
    44   def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
    44   def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
    45 }
    45 }
    46 
    46 
    47 
    47 
    48 
    48 
    49 // an example of an atomic parser for characters
    49 // an example of an atomic parser for characters
    50 case class CharParser(c: Char) extends Parser[String, Char] {
    50 case class CharParser(c: Char) extends Parser[String, Char] {
    51   def parse(in: String) = 
    51   def parse(in: String) =
    52     if (in != "" && in.head == c) Set((c, in.tail)) else Set()
    52     if (in != "" && in.head == c) Set((c, in.tail)) else Set()
    53 }
    53 }
    54 
    54 
    55 val ap = CharParser('a')
    55 val ap = CharParser('a')
    56 val bp = CharParser('b')
    56 val bp = CharParser('b')
    62 import scala.util.matching.Regex
    62 import scala.util.matching.Regex
    63 
    63 
    64 case class RegexParser(reg: Regex) extends Parser[String, String] {
    64 case class RegexParser(reg: Regex) extends Parser[String, String] {
    65   def parse(in: String) = reg.findPrefixMatchOf(in) match {
    65   def parse(in: String) = reg.findPrefixMatchOf(in) match {
    66     case None => Set()
    66     case None => Set()
    67     case Some(m) => Set((m.matched, m.after.toString))  
    67     case Some(m) => Set((m.matched, m.after.toString))
    68   }
    68   }
    69 }
    69 }
    70 
    70 
    71 // atomic parsers for numbers and "verbatim" strings 
    71 // atomic parsers for numbers and "verbatim" strings
    72 val NumParser = RegexParser("[0-9]+".r)
    72 val NumParser = RegexParser("[0-9]+".r)
    73 def StrParser(s: String) = RegexParser(Regex.quote(s).r)
    73 def StrParser(s: String) = RegexParser(Regex.quote(s).r)
    74 
    74 
    75 NumParser.parse("123a123bc") 
    75 NumParser.parse("123a123bc")
    76 StrParser("else").parse("elsethen")
    76 StrParser("else").parse("elsethen")
    77 
    77 
    78 
    78 
    79 // NumParserInt transforms a "string integer" into a propper Int
    79 // NumParserInt transforms a "string integer" into a propper Int
    80 // (needs "new" because MapParser is not a case class)
    80 // (needs "new" because MapParser is not a case class)
    81 
    81 
    82 val NumParserInt = MapParser(NumParser, (s: String) => s.toInt)
    82 val NumParserInt = MapParser(NumParser, (s: String) => s.toInt)
    83 NumParserInt.parse("123abc")
    83 NumParserInt.parse("123abc")
    84 
    84 
    85 // the following string interpolation allows us to write 
    85 // the following string interpolation allows us to write
    86 // StrParser(_some_string_) more conveniently as 
    86 // StrParser(_some_string_) more conveniently as
    87 //
    87 //
    88 // p"<_some_string_>" 
    88 // p"<_some_string_>"
    89 
    89 
    90 extension (sc: StringContext) 
    90 extension (sc: StringContext)
    91   def p(args: Any*) = StrParser(sc.s(args:_*))
    91   def p(args: Any*) = StrParser(sc.s(args*))
    92 
    92 
    93 
    93 
    94 (p"else").parse("elsethen")           
    94 (p"else").parse("elsethen")
    95 
    95 
    96 // more convenient syntax for parser combinators
    96 // more convenient syntax for parser combinators
    97 extension [I: IsSeq, T](p: Parser[I, T]) {
    97 extension [I: IsSeq, T](p: Parser[I, T]) {
    98   def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
    98   def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q)
    99   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
    99   def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q)
   100   def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
   100   def map[S](f: => T => S) = new MapParser[I, T, S](p, f)
   101 }
   101 }
   102 
   102 
   103 // simple example of transforming the 
   103 // simple example of transforming the
   104 // result into capital letters
   104 // result into capital letters
   105 def toU(s: String) = s.map(_.toUpper)
   105 def toU(s: String) = s.map(_.toUpper)
   106 
   106 
   107 (p"else").map(toU(_)).parse("elseifthen")  
   107 (p"else").map(toU(_)).parse("elseifthen")
   108 
   108 
   109 // these implicits allow us to use an infix notation for
   109 // these implicits allow us to use an infix notation for
   110 // sequences and alternatives; we also can write the usual
   110 // sequences and alternatives; we also can write the usual
   111 // map for a MapParser
   111 // map for a MapParser
   112 
   112 
   119 val x = 1 + 3
   119 val x = 1 + 3
   120 
   120 
   121 // A parser for palindromes (just returns them as string)
   121 // A parser for palindromes (just returns them as string)
   122 //  since the parser is recursive it needs to be lazy
   122 //  since the parser is recursive it needs to be lazy
   123 lazy val Pal : Parser[String, String] = {
   123 lazy val Pal : Parser[String, String] = {
   124    (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } || 
   124    (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } ||
   125    (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => s"$x$y$z" } || 
   125    (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => s"$x$y$z" } ||
   126     p"a" || p"b" || p""
   126     p"a" || p"b" || p""
   127 }  
   127 }
   128 
   128 
   129 // examples
   129 // examples
   130 Pal.parse_all("abacaba")
   130 Pal.parse_all("abacaba")
   131 Pal.parse("abacaaba")
   131 Pal.parse("abacaaba")
   132 
   132 
   133 println("Palindrome: " + Pal.parse_all("abaaaba"))
   133 println("Palindrome: " + Pal.parse_all("abaaaba"))
   134 
   134 
   135 // A parser for wellnested parentheses 
   135 // A parser for wellnested parentheses
   136 //
   136 //
   137 //   P ::= ( P ) P | epsilon
   137 //   P ::= ( P ) P | epsilon
   138 //
   138 //
   139 //   (transforms '(' -> '{' , ')' -> '}' )
   139 //   (transforms '(' -> '{' , ')' -> '}' )
   140 lazy val P : Parser[String, String] = {
   140 lazy val P : Parser[String, String] = {
   141   (p"(" ~ P ~ p")" ~ P).map{ case (((_, x), _), y) => "{" + x + "}" + y } ||
   141   (p"(" ~ P ~ p")" ~ P).map{ case (((_, x), _), y) => "{" + x + "}" + y } ||
   142   p""
   142   p""
   143 }  
   143 }
   144 
   144 
   145 println(P.parse_all("(((()()))())"))
   145 println(P.parse_all("(((()()))())"))
   146 println(P.parse_all("(((()()))()))"))
   146 println(P.parse_all("(((()()))()))"))
   147 println(P.parse_all(")("))
   147 println(P.parse_all(")("))
   148 println(P.parse_all("()"))
   148 println(P.parse_all("()"))
   170 
   170 
   171 
   171 
   172 // with parser combinators (and other parsing algorithms)
   172 // with parser combinators (and other parsing algorithms)
   173 // no left-recursion is allowed, otherwise the will loop
   173 // no left-recursion is allowed, otherwise the will loop
   174 
   174 
   175 lazy val EL: Parser[String, Int] = 
   175 lazy val EL: Parser[String, Int] =
   176   ((EL ~ p"+" ~ EL).map{ case ((x, y), z) => x + z} || 
   176   ((EL ~ p"+" ~ EL).map{ case ((x, y), z) => x + z} ||
   177    (EL ~ p"*" ~ EL).map{ case ((x, y), z) => x * z} ||
   177    (EL ~ p"*" ~ EL).map{ case ((x, y), z) => x * z} ||
   178    (p"(" ~ EL ~ p")").map{ case ((x, y), z) => y} ||
   178    (p"(" ~ EL ~ p")").map{ case ((x, y), z) => y} ||
   179    NumParserInt)
   179    NumParserInt)
   180 
   180 
   181 // this will run forever:
   181 // this will run forever:
   232 
   232 
   233 (One || Two).parse("aaa")
   233 (One || Two).parse("aaa")
   234 
   234 
   235 
   235 
   236 
   236 
   237 // a problem with the arithmetic expression parser: it 
   237 // a problem with the arithmetic expression parser: it
   238 // gets very slow with deeply nested parentheses
   238 // gets very slow with deeply nested parentheses
   239 
   239 
   240 println("A runtime problem")
   240 println("A runtime problem")
   241 println(E.parse("1"))
   241 println(E.parse("1"))
   242 println(E.parse("(1)"))
   242 println(E.parse("(1)"))