# HG changeset patch # User Christian Urban # Date 1540425058 -3600 # Node ID a4646557016def2b6550c568f07445221476d8e8 # Parent 5ddedcd92d84779daaea9f466ae56e72f969bbc7 updated diff -r 5ddedcd92d84 -r a4646557016d handouts/ho06.pdf Binary file handouts/ho06.pdf has changed diff -r 5ddedcd92d84 -r a4646557016d handouts/ho06.tex --- a/handouts/ho06.tex Wed Oct 24 20:37:37 2018 +0100 +++ b/handouts/ho06.tex Thu Oct 25 00:50:58 2018 +0100 @@ -2,7 +2,7 @@ \documentclass{article} \usepackage{../style} \usepackage{../langs} - +\usepackage{../grammar} \begin{document} @@ -539,11 +539,11 @@ notation for this parser combinator. Therefore we will write \texttt{p ==> f} for it. -What are semantic actions good for? Ultimately, they allow is to -transform the parsed input into a datastructure we can use for further +What are semantic actions good for? Well, they allow is to transform +the parsed input into a datastructure we can use for further processing. A simple example would be to transform parsed characters into ASCII numbers. Suppose we define a function \texttt{f} (from -characters to ints) and a \texttt{CharParser}. +characters to ints) and use a \texttt{CharParser} for the character \texttt{c}. \begin{center} \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] @@ -553,7 +553,7 @@ \end{center} \noindent -Then +Then we can run the following parsers on the input \texttt{cbd}: \begin{center} \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] @@ -563,8 +563,67 @@ \end{center} \noindent -the first line produces \texttt{Set(('c', "bd"))}, whereas the second -produces \texttt{Set((99, "bd"))}. +The first line we obtain the result \texttt{Set(('c', "bd"))}, whereas the second +produces \texttt{Set((99, "bd"))}---the character has been transformed into +an ASCII number. + +A slightly less contrived example is about parsing numbers (recall +\texttt{NumParser} above). However, we want to do this here for strings. +For this assume we have the following \texttt{RegexParser}. + +\begin{center} + \begin{lstlisting}[language=Scala,xleftmargin=0mm, + basicstyle=\small\ttfamily, numbers=none] +import scala.util.matching.Regex + +case class RegexParser(reg: Regex) extends Parser[String, String] { + def parse(in: String) = reg.findPrefixMatchOf(in) match { + case None => Set() + case Some(m) => Set((m.matched, m.after.toString)) + } +} +\end{lstlisting} +\end{center} + +\noindent +This parser takes a regex as argument and splits up a string into a +prefix and the rest according to this regex +(\texttt{reg.findPrefixMatchOf} generates a match---in the successful +case---and the corresponding strings can be extracted with +\texttt{matched} and \texttt{after}). We can now define a +\texttt{NumParser} for strings as follows: + +\begin{center} +\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] +val NumParser = RegexParser("[0-9]+".r) +\end{lstlisting} +\end{center} + +\noindent +This parser will recognise a number at the beginning of a string, for +example + +\begin{center} +\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] +NumParser.parse("123abc") +\end{lstlisting} +\end{center} + +\noindent +produces \texttt{Set((123,abc))}. The problem is that \texttt{123} is +still a string. We need to convert it into the corresponding +\texttt{Int}. We can do this as follows + +\begin{center} +\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] +(NumParser ==> (s => s.toInt)).parse("123abc") +\end{lstlisting} +\end{center} + +\noindent +The semantic action in form of a function converts a string into an +\texttt{Int}. Let us come back to semantic actions when we are going +to implement a simple calculator. \subsubsection*{Shorthand notation for parser combinators} @@ -574,6 +633,47 @@ \subsubsection*{How to build parsers using parser combinators?} +The beauty of parser combinators is the ease with which they can be +implemented and how easy it is to translate context-free grammars into +code (though the grammars need to be non-left-recursive). To +demonstrate this recall our context-free grammar for palindromes: + +\begin{plstx}[margin=3cm] +: \meta{P} ::= a\cdot \meta{P}\cdot a | b\cdot \meta{P}\cdot b | a | b | \epsilon\\ +\end{plstx} + +\noindent +Given the parser combinators for alternatives and sequeneces, this grammar should be +straightforward to implement. The first idea would be + +\begin{center} +\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] +lazy val Pal : Parser[String, String] = + (("a" ~ Pal ~ "a") | ("b" ~ Pal ~ "b") | "a" | "b" | "") +\end{lstlisting} +\end{center} + +\noindent +Unfortunately, this does not work as it produces a typing error. The +reason is that the parsers \texttt{"a"}, \texttt{"b"} and \texttt{""} +all produce strings and therefore can be put into an alternative +\texttt{...| "a" | "b" | ""}. But both \pcode{"a" ~ Pal ~ "a"} and +\pcode{"b" ~ Pal ~ "b"} produce pairs of the form +$(((\_, \_), \_), \_)$---that is how the sequence parser combinator +nests results when \pcode{\~} is used between two components. The +solution is to use a semantic action that ``flattens'' these pairs and +appends the corresponding strings, like + +\begin{center} +\begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] +lazy val Pal : Parser[String, String] = + (("a" ~ Pal ~ "a") ==> { case ((x, y), z) => x + y + z } | + ("b" ~ Pal ~ "b") ==> { case ((x, y), z) => x + y + z } | + "a" | "b" | "") +\end{lstlisting} +\end{center} + + \subsubsection*{Implementing an Interpreter} %\bigskip diff -r 5ddedcd92d84 -r a4646557016d langs.sty --- a/langs.sty Wed Oct 24 20:37:37 2018 +0100 +++ b/langs.sty Thu Oct 25 00:50:58 2018 +0100 @@ -19,8 +19,8 @@ new,null,object,override,package,% private,protected,requires,return,sealed,% super,this,throw,trait,true,try,% - type,val,var,while,with,yield,write,read},% - otherkeywords={=>,<-,<\%,<:,>:,\#},% + type,val,var,while,with,yield,write,read,lazy},% + otherkeywords={=>,<-,<\%,<:,>:,\#,==>},% sensitive=true,% %directives={Int,Char,Rexp,String,Boolean,BigInt,Unit,List,Set},% %moredelim=*[directive]:,% diff -r 5ddedcd92d84 -r a4646557016d progs/comb1.scala --- a/progs/comb1.scala Wed Oct 24 20:37:37 2018 +0100 +++ b/progs/comb1.scala Thu Oct 25 00:50:58 2018 +0100 @@ -37,22 +37,17 @@ if (sb != "" && sb.head == c) Set((c, sb.tail)) else Set() } -case class StringParser(s: String) extends Parser[String, String] { - def parse(sb: String) = { - val (prefix, suffix) = sb.splitAt(s.length) - if (prefix == s) Set((prefix, suffix)) else Set() +import scala.util.matching.Regex +case class RegexParser(reg: Regex) extends Parser[String, String] { + def parse(sb: String) = reg.findPrefixMatchOf(sb) match { + case None => Set() + case Some(m) => Set((m.matched, m.after.toString)) } } -case object NumParser extends Parser[String, Int] { - val reg = "[0-9]+".r - def parse(sb: String) = reg.findPrefixOf(sb) match { - case None => Set() - case Some(s) => Set(sb.splitAt(s.length) match { - case (x, y) => (x.toInt, y) - }) - } -} +val NumParser = RegexParser("[0-9]+".r) +def StringParser(s: String) = RegexParser(s.r) + // convenience implicit def string2parser(s: String) = StringParser(s) @@ -74,6 +69,13 @@ new SeqParser[String, String, String](s, r) } +val c = new CharParser('c') +lazy val cn = c ==> (c => c.toInt) +val f = (c: Char) => c.toInt + +c.parse("cb") +(c ==> f).parse("cb") + // a parse palindromes lazy val Pal : Parser[String, String] = (("a" ~ Pal ~ "a") ==> { case ((x, y), z) => x + y + z } || diff -r 5ddedcd92d84 -r a4646557016d slides/slides05.tex --- a/slides/slides05.tex Wed Oct 24 20:37:37 2018 +0100 +++ b/slides/slides05.tex Thu Oct 25 00:50:58 2018 +0100 @@ -598,15 +598,17 @@ A grammar for palindromes over the alphabet~\bl{$\{a,b\}$}: \bl{\begin{plstx}[margin=3cm] -: \meta{S} ::= \epsilon\\ : \meta{S} ::= a\cdot\meta{S}\cdot a\\ : \meta{S} ::= b\cdot\meta{S}\cdot b\\ +: \meta{S} ::= a\\ +: \meta{S} ::= b\\ +: \meta{S} ::= \epsilon\\ \end{plstx}}\pause or \bl{\begin{plstx}[margin=2cm] -: \meta{S} ::= \epsilon | a\cdot \meta{S}\cdot a | b\cdot \meta{S}\cdot b \\ +: \meta{S} ::= a\cdot \meta{S}\cdot a | b\cdot \meta{S}\cdot b | a | b | \epsilon\\ \end{plstx}}\pause\bigskip \small