handouts/ho06.tex
changeset 799 85267be9a5ed
parent 595 4bf0096bc06b
child 937 dc5ab66b11cc
--- a/handouts/ho06.tex	Fri Oct 30 01:45:03 2020 +0000
+++ b/handouts/ho06.tex	Wed Nov 04 17:34:52 2020 +0000
@@ -4,6 +4,7 @@
 \usepackage{../style}
 \usepackage{../langs}
 \usepackage{../grammar}
+\usepackage{../graphics}
 
 \begin{document}
 
@@ -244,8 +245,8 @@
 \end{center}
 
 \noindent Later on we will use Scala mechanism for introducing some
-more readable shorthand notation for this, like \texttt{"a" |
-  "b"}. Let us look in detail at what this parser combinator produces
+more readable shorthand notation for this, like \texttt{p"a" ||
+  p"b"}. Let us look in detail at what this parser combinator produces
 with some sample strings.
 
 \begin{center}
@@ -336,7 +337,7 @@
 produce the empty set.
 
 With the shorthand notation we shall introduce later for the sequence
-parser combinator, we can write for example \pcode{"a" ~ "b"}, which
+parser combinator, we can write for example \pcode{p"a" ~ p"b"}, which
 is the parser combinator that first recognises the character
 \texttt{a} from a string and then \texttt{b}. Let us look again at
 some examples of how this parser combinator processes some strings:
@@ -360,7 +361,7 @@
 because they do not fit with what the parser is supposed to parse.
 
 
-A slightly more complicated parser is \pcode{("a" | "b") ~ "c"} which
+A slightly more complicated parser is \pcode{(p"a" || p"b") ~ p"c"} which
 parses as first character either an \texttt{a} or \texttt{b}, followed
 by a \texttt{c}. This parser produces the following outputs.
 
@@ -374,7 +375,7 @@
 \end{center}
 
 \noindent
-Now consider the parser \pcode{("a" ~ "b") ~ "c"} which parses
+Now consider the parser \pcode{(p"a" ~ p"b") ~ p"c"} which parses
 \texttt{a}, \texttt{b}, \texttt{c} in sequence. This parser produces
 the following outputs.
 
@@ -392,7 +393,7 @@
 ``missing'' in the sequence we are looking for. The first succeeds but
 notice how the results nest with sequences: the parsed part is a
 nested pair of the form \pcode{((a, b), c)}. If we nest the sequence
-parser differently, say \pcode{"a" ~ ("b" ~ "c")}, then also
+parser differently, say \pcode{p"a" ~ (p"b" ~ p"c")}, then also
 our output pairs nest differently
 
 \begin{center}
@@ -404,7 +405,7 @@
 
 \noindent
 Two more examples: first consider the parser
-\pcode{("a" ~ "a") ~ "a"} and the input \pcode{aaaa}:
+\pcode{(p"a" ~ p"a") ~ p"a"} and the input \pcode{aaaa}:
 
 \begin{center}
 \begin{tabular}{rcl}
@@ -416,8 +417,8 @@
 
 \noindent Notice again how the results nest deeper and deeper as pairs (the
 last \pcode{a} is in the unprocessed part). To consume everything of
-this string we can use the parser \pcode{(("a" ~ "a") ~ "a") ~
-  "a"}. Then the output is as follows:
+this string we can use the parser \pcode{((p"a" ~ p"a") ~ p"a") ~
+  p"a"}. Then the output is as follows:
 
 \begin{center}
 \begin{tabular}{rcl}
@@ -444,8 +445,8 @@
 nesting of the component parsers.
 
 
-Consider also carefully that constructing a parser such \pcode{"a" |
-  ("a" ~ "b")} will result in a typing error. The intention with this
+Consider also carefully that constructing a parser such \pcode{p"a" ||
+  (p"a" ~ p"b")} will result in a typing error. The intention with this
 parser is that we want to parse either an \texttt{a}, or an \texttt{a}
 followed by a \texttt{b}. However, the first parser has as output type
 a single character (recall the type of \texttt{CharParser}), but the
@@ -462,7 +463,7 @@
 
 \begin{center}
 \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none]
-class FunParser[I, T, S]
+class MapParser[I, T, S]
          (p: => Parser[I, T], 
           f: T => S) extends Parser[I, S] {
   def parse(in: I) = 
@@ -480,7 +481,7 @@
 parser outputs. Since this function is of type \texttt{T => S}, we
 obtain a parser with output type \texttt{S}. Again Scala lets us
 introduce some shorthand notation for this parser
-combinator. Therefore we will write short \texttt{p ==> f} for it.
+combinator. Therefore we will write short \texttt{p.map(f)} for it.
 
 What are semantic actions good for? Well, they allow you to transform
 the parsed input into datastructures you can use for further
@@ -503,7 +504,7 @@
 \begin{center}
 \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none]
 c.parse("cbd")
-(c ==> f).parse("cbd")
+c.map(f).parse("cbd")
 \end{lstlisting}
 \end{center}
 
@@ -565,7 +566,7 @@
 
 \begin{center}
 \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none]
-(NumParser ==> (s => s.toInt)).parse("123abc")
+NumParser.map{s => s.toInt}.parse("123abc")
 \end{lstlisting}
 \end{center}  
 
@@ -585,14 +586,14 @@
 
 \begin{center}
 \begin{tabular}{ll}  
-  \pcode{p | q} & alternative parser\\
+  \pcode{p || q} & alternative parser\\
   \pcode{p ~ q} & sequence parser\\ 
-  \pcode{p ==> f} & semantic action parser
+  \pcode{p.map(f)} & semantic action parser
 \end{tabular}
 \end{center}
 
 \noindent
-as well as to use plain strings for specifying simple string parsers.
+as well as to use string interpolations for specifying simple string parsers.
 
 The idea is that this shorthand notation allows us to easily translate
 context-free grammars into code. For example recall our context-free
@@ -606,7 +607,7 @@
 Each alternative in this grammar translates into an alternative parser
 combinator.  The $\cdot$ can be translated to a sequence parser
 combinator. The parsers for $a$, $b$ and $\epsilon$ can be simply
-written as \texttt{"a"}, \texttt{"b"} and \texttt{""}.
+written as \texttt{p"a"}, \texttt{p"b"} and \texttt{p""}.
 
 
 \subsubsection*{How to build parsers using parser combinators?}
@@ -620,16 +621,16 @@
 \begin{center}
 \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none]
 lazy val Pal : Parser[String, String] = 
-  (("a" ~ Pal ~ "a") | ("b" ~ Pal ~ "b") | "a" | "b" | "")
+  ((p"a" ~ Pal ~ p"a") || (p"b" ~ Pal ~ p"b") || p"a" || p"b" || p"")
 \end{lstlisting}
 \end{center}
 
 \noindent
 Unfortunately, this does not quite work yet as it produces a typing
-error. The reason is that the parsers \texttt{"a"}, \texttt{"b"} and
-\texttt{""} all produce strings as output type and therefore can be
-put into an alternative \texttt{...| "a" | "b" | ""}. But both
-sequence parsers \pcode{"a" ~ Pal ~ "a"} and \pcode{"b" ~ Pal ~ "b"}
+error. The reason is that the parsers \texttt{p"a"}, \texttt{p"b"} and
+\texttt{p""} all produce strings as output type and therefore can be
+put into an alternative \texttt{...|| p"a" || p"b" || p""}. But both
+sequence parsers \pcode{p"a" ~ Pal ~ p"a"} and \pcode{p"b" ~ Pal ~ p"b"}
 produce pairs of the form
 
 \begin{center}
@@ -644,15 +645,15 @@
 \begin{center}
 \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none]
 lazy val Pal : Parser[String, String] =  
-  (("a" ~ Pal ~ "a") ==> { case ((x, y), z) => x + y + z } |
-   ("b" ~ Pal ~ "b") ==> { case ((x, y), z) => x + y + z } |
-    "a" | "b" | "")
+  ((p"a" ~ Pal ~ p"a").map{ case ((x, y), z) => x + y + z } ||
+   (p"b" ~ Pal ~ p"b").map{ case ((x, y), z) => x + y + z } ||
+    p"a" || p"b" || p"")
 \end{lstlisting}
 \end{center}
 
 \noindent
 How does this work? Well, recall again what the pairs look like for
-the parser \pcode{"a" ~ Pal ~ "a"}.  The pattern in the semantic
+the parser \pcode{p"a" ~ Pal ~ p"a"}.  The pattern in the semantic
 action matches the nested pairs (the \texttt{x} with the
 \texttt{a}-part and so on).  Unfortunately when we have such nested
 pairs, Scala requires us to define the function using the
@@ -670,7 +671,7 @@
 The action we implement above is to concatenate all three strings, which
 means after the semantic action is applied the output type of the parser 
 is \texttt{String}, which means it fits with the alternative parsers
-\texttt{...| "a" | "b" | ""}.
+\texttt{...|| p"a" || p"b" || p""}.
 
 If we run the parser above with \pcode{Pal.parse_all("abaaaba")} we obtain
 as result the \pcode{Set(abaaaba)}, which indicates that the string is a palindrome
@@ -772,7 +773,7 @@
   \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily,
     xleftmargin=0mm, numbers=none]
 lazy val P : Parser[String, String] = 
-  "(" ~ P ~ ")" ~ P ==> { case (((_,x),_),y) => "{" + x + "}" + y } | ""
+  (p"(" ~ P ~ p")" ~ P).map{ case (((_,x),_),y) => "{" + x + "}" + y } || p""
 \end{lstlisting}
 \end{center}
 
@@ -815,10 +816,10 @@
 \begin{center}
 \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none]
 lazy val E: Parser[String, Int] = 
-  (E ~ "+" ~ E ==> { case ((x, y), z) => x + z} |
-   E ~ "-" ~ E ==> { case ((x, y), z) => x - z} |
-   E ~ "*" ~ E ==> { case ((x, y), z) => x * z} |
-   "(" ~ E ~ ")" ==> { case ((x, y), z) => y} |
+  ((E ~ p"+" ~ E).map{ case ((x, y), z) => x + z} ||
+   (E ~ p"-" ~ E).map{ case ((x, y), z) => x - z} ||
+   (E ~ p"*" ~ E).map{ case ((x, y), z) => x * z} ||
+   (p"(" ~ E ~ p")").map{ case ((x, y), z) => y} ||
    NumParserInt)
 \end{lstlisting}
 \end{center}
@@ -870,12 +871,12 @@
 \begin{center}
 \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none]
 lazy val E: Parser[String, Int] = 
-  (T ~ "+" ~ E) ==> { case ((x, y), z) => x + z } |
-  (T ~ "-" ~ E) ==> { case ((x, y), z) => x - z } | T 
+  (T ~ p"+" ~ E).map{ case ((x, y), z) => x + z } ||
+  (T ~ p"-" ~ E).map{ case ((x, y), z) => x - z } || T 
 lazy val T: Parser[String, Int] = 
-  (F ~ "*" ~ T) ==> { case ((x, y), z) => x * z } | F
+  (F ~ p"*" ~ T).map{ case ((x, y), z) => x * z } || F
 lazy val F: Parser[String, Int] = 
-  ("(" ~ E ~ ")") ==> { case ((x, y), z) => y } | NumParserInt
+  (p"(" ~ E ~ p")").map{ case ((x, y), z) => y } || NumParserInt
 \end{lstlisting}
 \end{center}