diff -r 64ec1884d860 -r c7009356ddd8 progs/parser-combinators/comb1.sc --- a/progs/parser-combinators/comb1.sc Wed Feb 21 09:14:12 2024 +0000 +++ b/progs/parser-combinators/comb1.sc Wed May 29 13:25:30 2024 +0100 @@ -5,18 +5,18 @@ // // amm comb1.sc - + // Note, in the lectures I did not show the type bound -// using is: I => Seq[_], which means that the input -// type 'I' needs to be a sequence. +// using is: I => Seq[_], which means that the input +// type 'I' needs to be a sequence. -type IsSeq[I] = I => Seq[_] +type IsSeq[I] = I => Seq[?] -abstract class Parser[I, T](using is: IsSeq[I]) { - def parse(in: I): Set[(T, I)] +abstract class Parser[I: IsSeq, T](using is: IsSeq[I]) { + def parse(in: I): Set[(T, I)] def parse_all(in: I) : Set[T] = - for ((hd, tl) <- parse(in); + for ((hd, tl) <- parse(in); if is(tl).isEmpty) yield hd } @@ -25,21 +25,21 @@ // alternative parser -class AltParser[I : IsSeq, T](p: => Parser[I, T], +class AltParser[I : IsSeq, T](p: => Parser[I, T], q: => Parser[I, T]) extends Parser[I, T] { - def parse(in: I) = p.parse(in) ++ q.parse(in) + def parse(in: I) = p.parse(in) ++ q.parse(in) } // sequence parser -class SeqParser[I: IsSeq, T, S](p: => Parser[I, T], +class SeqParser[I: IsSeq, T, S](p: => Parser[I, T], q: => Parser[I, S]) extends Parser[I, (T, S)] { - def parse(in: I) = - for ((hd1, tl1) <- p.parse(in); + def parse(in: I) = + for ((hd1, tl1) <- p.parse(in); (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2) } // map parser -class MapParser[I : IsSeq, T, S](p: => Parser[I, T], +class MapParser[I : IsSeq, T, S](p: => Parser[I, T], f: T => S) extends Parser[I, S] { def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) } @@ -48,7 +48,7 @@ // an example of an atomic parser for characters case class CharParser(c: Char) extends Parser[String, Char] { - def parse(in: String) = + def parse(in: String) = if (in != "" && in.head == c) Set((c, in.tail)) else Set() } @@ -64,15 +64,15 @@ case class RegexParser(reg: Regex) extends Parser[String, String] { def parse(in: String) = reg.findPrefixMatchOf(in) match { case None => Set() - case Some(m) => Set((m.matched, m.after.toString)) + case Some(m) => Set((m.matched, m.after.toString)) } } -// atomic parsers for numbers and "verbatim" strings +// atomic parsers for numbers and "verbatim" strings val NumParser = RegexParser("[0-9]+".r) def StrParser(s: String) = RegexParser(Regex.quote(s).r) -NumParser.parse("123a123bc") +NumParser.parse("123a123bc") StrParser("else").parse("elsethen") @@ -82,16 +82,16 @@ val NumParserInt = MapParser(NumParser, (s: String) => s.toInt) NumParserInt.parse("123abc") -// the following string interpolation allows us to write -// StrParser(_some_string_) more conveniently as +// the following string interpolation allows us to write +// StrParser(_some_string_) more conveniently as // -// p"<_some_string_>" +// p"<_some_string_>" -extension (sc: StringContext) - def p(args: Any*) = StrParser(sc.s(args:_*)) +extension (sc: StringContext) + def p(args: Any*) = StrParser(sc.s(args*)) -(p"else").parse("elsethen") +(p"else").parse("elsethen") // more convenient syntax for parser combinators extension [I: IsSeq, T](p: Parser[I, T]) { @@ -100,11 +100,11 @@ def map[S](f: => T => S) = new MapParser[I, T, S](p, f) } -// simple example of transforming the +// simple example of transforming the // result into capital letters def toU(s: String) = s.map(_.toUpper) -(p"else").map(toU(_)).parse("elseifthen") +(p"else").map(toU(_)).parse("elseifthen") // these implicits allow us to use an infix notation for // sequences and alternatives; we also can write the usual @@ -121,10 +121,10 @@ // A parser for palindromes (just returns them as string) // since the parser is recursive it needs to be lazy lazy val Pal : Parser[String, String] = { - (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } || - (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => s"$x$y$z" } || + (p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => s"$x$y$z" } || + (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => s"$x$y$z" } || p"a" || p"b" || p"" -} +} // examples Pal.parse_all("abacaba") @@ -132,7 +132,7 @@ println("Palindrome: " + Pal.parse_all("abaaaba")) -// A parser for wellnested parentheses +// A parser for wellnested parentheses // // P ::= ( P ) P | epsilon // @@ -140,7 +140,7 @@ lazy val P : Parser[String, String] = { (p"(" ~ P ~ p")" ~ P).map{ case (((_, x), _), y) => "{" + x + "}" + y } || p"" -} +} println(P.parse_all("(((()()))())")) println(P.parse_all("(((()()))()))")) @@ -172,8 +172,8 @@ // with parser combinators (and other parsing algorithms) // no left-recursion is allowed, otherwise the will loop -lazy val EL: Parser[String, Int] = - ((EL ~ p"+" ~ EL).map{ case ((x, y), z) => x + z} || +lazy val EL: Parser[String, Int] = + ((EL ~ p"+" ~ EL).map{ case ((x, y), z) => x + z} || (EL ~ p"*" ~ EL).map{ case ((x, y), z) => x * z} || (p"(" ~ EL ~ p")").map{ case ((x, y), z) => y} || NumParserInt) @@ -234,7 +234,7 @@ -// a problem with the arithmetic expression parser: it +// a problem with the arithmetic expression parser: it // gets very slow with deeply nested parentheses println("A runtime problem")