diff -r 409e5014edde -r c18b991eaad2 handouts/ho06.tex --- a/handouts/ho06.tex Fri Oct 30 01:45:03 2020 +0000 +++ b/handouts/ho06.tex Wed Nov 04 17:34:52 2020 +0000 @@ -4,6 +4,7 @@ \usepackage{../style} \usepackage{../langs} \usepackage{../grammar} +\usepackage{../graphics} \begin{document} @@ -244,8 +245,8 @@ \end{center} \noindent Later on we will use Scala mechanism for introducing some -more readable shorthand notation for this, like \texttt{"a" | - "b"}. Let us look in detail at what this parser combinator produces +more readable shorthand notation for this, like \texttt{p"a" || + p"b"}. Let us look in detail at what this parser combinator produces with some sample strings. \begin{center} @@ -336,7 +337,7 @@ produce the empty set. With the shorthand notation we shall introduce later for the sequence -parser combinator, we can write for example \pcode{"a" ~ "b"}, which +parser combinator, we can write for example \pcode{p"a" ~ p"b"}, which is the parser combinator that first recognises the character \texttt{a} from a string and then \texttt{b}. Let us look again at some examples of how this parser combinator processes some strings: @@ -360,7 +361,7 @@ because they do not fit with what the parser is supposed to parse. -A slightly more complicated parser is \pcode{("a" | "b") ~ "c"} which +A slightly more complicated parser is \pcode{(p"a" || p"b") ~ p"c"} which parses as first character either an \texttt{a} or \texttt{b}, followed by a \texttt{c}. This parser produces the following outputs. @@ -374,7 +375,7 @@ \end{center} \noindent -Now consider the parser \pcode{("a" ~ "b") ~ "c"} which parses +Now consider the parser \pcode{(p"a" ~ p"b") ~ p"c"} which parses \texttt{a}, \texttt{b}, \texttt{c} in sequence. This parser produces the following outputs. @@ -392,7 +393,7 @@ ``missing'' in the sequence we are looking for. The first succeeds but notice how the results nest with sequences: the parsed part is a nested pair of the form \pcode{((a, b), c)}. If we nest the sequence -parser differently, say \pcode{"a" ~ ("b" ~ "c")}, then also +parser differently, say \pcode{p"a" ~ (p"b" ~ p"c")}, then also our output pairs nest differently \begin{center} @@ -404,7 +405,7 @@ \noindent Two more examples: first consider the parser -\pcode{("a" ~ "a") ~ "a"} and the input \pcode{aaaa}: +\pcode{(p"a" ~ p"a") ~ p"a"} and the input \pcode{aaaa}: \begin{center} \begin{tabular}{rcl} @@ -416,8 +417,8 @@ \noindent Notice again how the results nest deeper and deeper as pairs (the last \pcode{a} is in the unprocessed part). To consume everything of -this string we can use the parser \pcode{(("a" ~ "a") ~ "a") ~ - "a"}. Then the output is as follows: +this string we can use the parser \pcode{((p"a" ~ p"a") ~ p"a") ~ + p"a"}. Then the output is as follows: \begin{center} \begin{tabular}{rcl} @@ -444,8 +445,8 @@ nesting of the component parsers. -Consider also carefully that constructing a parser such \pcode{"a" | - ("a" ~ "b")} will result in a typing error. The intention with this +Consider also carefully that constructing a parser such \pcode{p"a" || + (p"a" ~ p"b")} will result in a typing error. The intention with this parser is that we want to parse either an \texttt{a}, or an \texttt{a} followed by a \texttt{b}. However, the first parser has as output type a single character (recall the type of \texttt{CharParser}), but the @@ -462,7 +463,7 @@ \begin{center} \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] -class FunParser[I, T, S] +class MapParser[I, T, S] (p: => Parser[I, T], f: T => S) extends Parser[I, S] { def parse(in: I) = @@ -480,7 +481,7 @@ parser outputs. Since this function is of type \texttt{T => S}, we obtain a parser with output type \texttt{S}. Again Scala lets us introduce some shorthand notation for this parser -combinator. Therefore we will write short \texttt{p ==> f} for it. +combinator. Therefore we will write short \texttt{p.map(f)} for it. What are semantic actions good for? Well, they allow you to transform the parsed input into datastructures you can use for further @@ -503,7 +504,7 @@ \begin{center} \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] c.parse("cbd") -(c ==> f).parse("cbd") +c.map(f).parse("cbd") \end{lstlisting} \end{center} @@ -565,7 +566,7 @@ \begin{center} \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] -(NumParser ==> (s => s.toInt)).parse("123abc") +NumParser.map{s => s.toInt}.parse("123abc") \end{lstlisting} \end{center} @@ -585,14 +586,14 @@ \begin{center} \begin{tabular}{ll} - \pcode{p | q} & alternative parser\\ + \pcode{p || q} & alternative parser\\ \pcode{p ~ q} & sequence parser\\ - \pcode{p ==> f} & semantic action parser + \pcode{p.map(f)} & semantic action parser \end{tabular} \end{center} \noindent -as well as to use plain strings for specifying simple string parsers. +as well as to use string interpolations for specifying simple string parsers. The idea is that this shorthand notation allows us to easily translate context-free grammars into code. For example recall our context-free @@ -606,7 +607,7 @@ Each alternative in this grammar translates into an alternative parser combinator. The $\cdot$ can be translated to a sequence parser combinator. The parsers for $a$, $b$ and $\epsilon$ can be simply -written as \texttt{"a"}, \texttt{"b"} and \texttt{""}. +written as \texttt{p"a"}, \texttt{p"b"} and \texttt{p""}. \subsubsection*{How to build parsers using parser combinators?} @@ -620,16 +621,16 @@ \begin{center} \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] lazy val Pal : Parser[String, String] = - (("a" ~ Pal ~ "a") | ("b" ~ Pal ~ "b") | "a" | "b" | "") + ((p"a" ~ Pal ~ p"a") || (p"b" ~ Pal ~ p"b") || p"a" || p"b" || p"") \end{lstlisting} \end{center} \noindent Unfortunately, this does not quite work yet as it produces a typing -error. The reason is that the parsers \texttt{"a"}, \texttt{"b"} and -\texttt{""} all produce strings as output type and therefore can be -put into an alternative \texttt{...| "a" | "b" | ""}. But both -sequence parsers \pcode{"a" ~ Pal ~ "a"} and \pcode{"b" ~ Pal ~ "b"} +error. The reason is that the parsers \texttt{p"a"}, \texttt{p"b"} and +\texttt{p""} all produce strings as output type and therefore can be +put into an alternative \texttt{...|| p"a" || p"b" || p""}. But both +sequence parsers \pcode{p"a" ~ Pal ~ p"a"} and \pcode{p"b" ~ Pal ~ p"b"} produce pairs of the form \begin{center} @@ -644,15 +645,15 @@ \begin{center} \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] lazy val Pal : Parser[String, String] = - (("a" ~ Pal ~ "a") ==> { case ((x, y), z) => x + y + z } | - ("b" ~ Pal ~ "b") ==> { case ((x, y), z) => x + y + z } | - "a" | "b" | "") + ((p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => x + y + z } || + (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => x + y + z } || + p"a" || p"b" || p"") \end{lstlisting} \end{center} \noindent How does this work? Well, recall again what the pairs look like for -the parser \pcode{"a" ~ Pal ~ "a"}. The pattern in the semantic +the parser \pcode{p"a" ~ Pal ~ p"a"}. The pattern in the semantic action matches the nested pairs (the \texttt{x} with the \texttt{a}-part and so on). Unfortunately when we have such nested pairs, Scala requires us to define the function using the @@ -670,7 +671,7 @@ The action we implement above is to concatenate all three strings, which means after the semantic action is applied the output type of the parser is \texttt{String}, which means it fits with the alternative parsers -\texttt{...| "a" | "b" | ""}. +\texttt{...|| p"a" || p"b" || p""}. If we run the parser above with \pcode{Pal.parse_all("abaaaba")} we obtain as result the \pcode{Set(abaaaba)}, which indicates that the string is a palindrome @@ -772,7 +773,7 @@ \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, xleftmargin=0mm, numbers=none] lazy val P : Parser[String, String] = - "(" ~ P ~ ")" ~ P ==> { case (((_,x),_),y) => "{" + x + "}" + y } | "" + (p"(" ~ P ~ p")" ~ P).map{ case (((_,x),_),y) => "{" + x + "}" + y } || p"" \end{lstlisting} \end{center} @@ -815,10 +816,10 @@ \begin{center} \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] lazy val E: Parser[String, Int] = - (E ~ "+" ~ E ==> { case ((x, y), z) => x + z} | - E ~ "-" ~ E ==> { case ((x, y), z) => x - z} | - E ~ "*" ~ E ==> { case ((x, y), z) => x * z} | - "(" ~ E ~ ")" ==> { case ((x, y), z) => y} | + ((E ~ p"+" ~ E).map{ case ((x, y), z) => x + z} || + (E ~ p"-" ~ E).map{ case ((x, y), z) => x - z} || + (E ~ p"*" ~ E).map{ case ((x, y), z) => x * z} || + (p"(" ~ E ~ p")").map{ case ((x, y), z) => y} || NumParserInt) \end{lstlisting} \end{center} @@ -870,12 +871,12 @@ \begin{center} \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] lazy val E: Parser[String, Int] = - (T ~ "+" ~ E) ==> { case ((x, y), z) => x + z } | - (T ~ "-" ~ E) ==> { case ((x, y), z) => x - z } | T + (T ~ p"+" ~ E).map{ case ((x, y), z) => x + z } || + (T ~ p"-" ~ E).map{ case ((x, y), z) => x - z } || T lazy val T: Parser[String, Int] = - (F ~ "*" ~ T) ==> { case ((x, y), z) => x * z } | F + (F ~ p"*" ~ T).map{ case ((x, y), z) => x * z } || F lazy val F: Parser[String, Int] = - ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } | NumParserInt + (p"(" ~ E ~ p")").map{ case ((x, y), z) => y } || NumParserInt \end{lstlisting} \end{center}