# HG changeset patch # User Christian Urban # Date 1383347985 0 # Node ID 53def1fbf472a7393f0309a52f6b1efc5f421b33 # Parent 3c2653fc8b5a95a2f000e472e37c11c5688e4ea7 updated diff -r 3c2653fc8b5a -r 53def1fbf472 handouts/ho06.pdf Binary file handouts/ho06.pdf has changed diff -r 3c2653fc8b5a -r 53def1fbf472 handouts/ho06.tex --- a/handouts/ho06.tex Fri Nov 01 15:56:17 2013 +0000 +++ b/handouts/ho06.tex Fri Nov 01 23:19:45 2013 +0000 @@ -300,18 +300,59 @@ to implement and closely resemble grammar rules. Imagine that a grammar describes the strings of natural numbers, such as the grammar $N$ shown above. For all such strings we want to generate the parse-trees or later on we actually -want to extract the meaning of these strings, that is the integers ``behind'' these -strings. The parser will therefore be functions of type +want to extract the meaning of these strings, that is the concrete integers ``behind'' +these strings. The parser combinators will be functions of type \begin{center} -\texttt{I $\Rightarrow$ (T, I)} +\texttt{I $\Rightarrow$ Set[(T, I)]} \end{center} \noindent -that is they take as input something of type \texttt{I}, typically a list of token or a string, -and return a pair. +that is they take as input something of type \texttt{I}, typically a list of tokens or a string, +and return a set of pairs. The first component of these pairs corresponds to what the +parser combinator was able to process from the input and the second is the unprocessed +part of the input. As we shall see shortly, a parser combinator might return more than one such pair, +with the idea that there are potentially several ways how to interpret the input. + +The abstract class for parser combinators requires the implementation of the function +\texttt{parse} taking an argument of type \texttt{I} and returns a set of type +\mbox{\texttt{Set[(T, I)]}}. + +\begin{center} +\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] +abstract class Parser[I, T] { + def parse(ts: I): Set[(T, I)] + + def parse_all(ts: I): Set[T] = + for ((head, tail) <- parse(ts); if (tail.isEmpty)) + yield head +} +\end{lstlisting} +\end{center} +\noindent +One of the simplest parser combinators recognises just a character, say $c$, +from the beginning of strings. Its behaviour is as follows: +\begin{itemize} +\item if the head of the input string starts with a $c$, it returns + the set $\{(c, \textit{tail of}\; s)\}$ +\item otherwise it returns the empty set $\varnothing$ +\end{itemize} + +\noindent +The input type of this simple parser combinator for characters is +\texttt{String} and the output type \mbox{\texttt{Set[(Char, String)]}}. +The code in Scala is as follows: + +\begin{center} +\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] +case class CharParser(c: Char) extends Parser[String, Char] { + def parse(sb: String) = + if (sb.head == c) Set((c, sb.tail)) else Set() +} +\end{lstlisting} +\end{center} \end{document} diff -r 3c2653fc8b5a -r 53def1fbf472 progs/comb1.scala --- a/progs/comb1.scala Fri Nov 01 15:56:17 2013 +0000 +++ b/progs/comb1.scala Fri Nov 01 23:19:45 2013 +0000 @@ -24,6 +24,11 @@ } // atomic parsers +case class CharParser(c: Char) extends Parser[String, Char] { + def parse(sb: String) = + if (sb.head == c) Set((c, sb.tail)) else Set() +} + case class StringParser(s: String) extends Parser[String, String] { def parse(sb: String) = { val (prefix, suffix) = sb.splitAt(s.length)