updated
authorChristian Urban <christian.urban@kcl.ac.uk>
Sat, 11 Nov 2023 10:08:33 +0000
changeset 954 eda0ccf56c72
parent 953 5e070fb0332a
child 955 47acfd7f9096
updated
hws/hw06.pdf
hws/hw06.tex
progs/parser-combinators/comb1.sc
progs/parser-combinators/comb2.sc
slides/slides06.pdf
slides/slides06.tex
Binary file hws/hw06.pdf has changed
--- a/hws/hw06.tex	Sat Nov 04 18:28:09 2023 +0000
+++ b/hws/hw06.tex	Sat Nov 11 10:08:33 2023 +0000
@@ -1,6 +1,7 @@
 \documentclass{article}
 \usepackage{../style}
 \usepackage{../graphics}
+\usepackage{../grammar}
 
 \begin{document}
 
@@ -28,20 +29,38 @@
 
 \item Suppose the grammar
 
-\begin{center}
-\begin{tabular}{lcl}
-$E$ & $\rightarrow$ &  $F \;|\; F \cdot * \cdot F \;|\; F \cdot \backslash \cdot F$\\
-$F$ & $\rightarrow$ & $T \;|\; T \cdot \texttt{+} \cdot T \;|\; T \cdot \texttt{-} \cdot T$\\
-$T$ & $\rightarrow$ & $num \;|\; \texttt{(} \cdot E \cdot \texttt{)}$\\
-\end{tabular}
-\end{center}
+\begin{plstx}[margin=1cm]    
+:\meta{E} ::= \meta{F} \;|\; \meta{F} \cdot \texttt{*} \cdot \meta{F} \;|\; \meta{F} \cdot \backslash \cdot \meta{F}\\
+:\meta{F} ::= \meta{T} \;|\; \meta{T} \cdot \texttt{+} \cdot \meta{T} \;|\; \meta{T} \cdot \texttt{-} \cdot \meta{T}\\
+:\meta{T} ::= $num$ \;|\; \texttt{(} \cdot E \cdot \texttt{)}\\
+\end{plstx}
 
-where $E$, $F$ and $T$ are non-terminals, $E$ is the starting symbol of the grammar, and $num$ stands for
-a number token. Give a parse tree for the string \texttt{(3+3)+(2*3)}.
+where \meta{E}, \meta{F} and \meta{T} are non-terminals, \meta{E} is
+the starting symbol of the grammar, and $num$ stands for a number
+token. Give a parse tree for the string \texttt{(3+3)+(2*3)}. Note that
+\meta{F} and \meta{T} are ``exchanged'' in this grammar in comparison with
+the usual grammar for arithmetic expressions. What does this mean in terms
+of precedences of the operators?
+
+\solution{
+  For the second part "1+2*3" will be parsed as (1+2)*3, meaning + and - bind
+  tighter than * and $\backslash$
+}
 
 \item Define what it means for a grammar to be ambiguous. Give an example of
 an ambiguous grammar.
 
+\solution{
+  Already the grammar
+  \begin{plstx}[margin=1cm]
+    : \meta{E} ::= \meta{E}\cdot +\cdot\meta{E} | num\\
+  \end{plstx}
+
+  is ambiguous because a string like "1+2+3" can be parsed
+  as (1+2)+3 or 1+(2+3).
+  }
+
+
 \item Suppose boolean expressions are built up from
 
 \begin{center}
@@ -61,17 +80,34 @@
 
 \item Parsing combinators consist of atomic parsers, alternative
   parsers, sequence parsers and semantic actions.  What is the purpose
-  of (1) atomic parsers and of (2) semantic actions?
+  of (1) atomic parsers and of (2) map-parsers (also called semantic actions)?
 
+  \solution{
+    Atomic parsers look at a concrete prefix of the input, like num-tokens or identifiers.
+    Map-parsers apply a function to the output of a parser. In this way you can transform
+    the output from, for example, a string to an integer.
+  }
+
+  
 \item Parser combinators can directly be given a string as
       input, without the need of a lexer. What are the
       advantages to first lex a string and then feed a
       sequence of tokens as input to the parser?
 
-% Reason 1 you can filter out whitespaces and comments, which makes the grammar rules simpler. If you have to make sure that a whitespace comes after a variable say, then your parser rule for variables  gets more complicated. Same with comments which do not contribute anything to the parser tree.
-% Reason 2) The lexer can already classify tokens, for example as numbers, keywords or identifiers. This again makes the grammar rules more deterministic and as a result faster to parse.
-% The point is that parser combinators can be used to process strings, but in case of compilers where whitespaces and comments need to be filtered out, the lexing phase is actually quite useful.
+      \solution{ Reason 1 you can filter out whitespaces and comments,
+        which makes the grammar rules simpler. If you have to make
+        sure that a whitespace comes after a variable say, then your
+        parser rule for variables gets more complicated. Same with
+        comments which do not contribute anything to the parse tree.
+        
+        Reason 2) The lexer can already classify tokens, for example
+        as numbers, keywords or identifiers. This again makes the grammar
+        rules more deterministic and as a result faster to parse.
 
+        The point is that parser combinators can be used to process
+        strings, but in case of compilers where whitespaces and
+        comments need to be filtered out, the lexing phase is actually
+        quite useful.  }
       
 \item The injection function for sequence regular expressions is defined
       by three clauses:
@@ -84,8 +120,20 @@
 \end{tabular}
 \end{center}
 
-Explain why there are three cases in the injection function for sequence
-regular expressions. 
+     Explain why there are three cases in the injection function for sequence
+     regular expressions.
+
+     \solution{
+       This is because the derivative of sequences can be of the form
+
+       \begin{itemize}
+       \item $(der\,c\,r_1)\cdot r_2$
+       \item $(der\,c\,r_1)\cdot r_2 \;+\; der\,c\,r_2$  
+       \end{itemize}
+
+       In the first case the value needs to be of the form $Seq$, in the second case $\Left$ or $Right$.
+       Therefore 3 cases.
+       }
       
 \item \POSTSCRIPT        
 \end{enumerate}
--- a/progs/parser-combinators/comb1.sc	Sat Nov 04 18:28:09 2023 +0000
+++ b/progs/parser-combinators/comb1.sc	Sat Nov 11 10:08:33 2023 +0000
@@ -20,6 +20,7 @@
         if is(tl).isEmpty) yield hd
 }
 
+
 // parser combinators
 
 
@@ -39,7 +40,7 @@
 
 // map parser
 class MapParser[I : IsSeq, T, S](p: => Parser[I, T], 
-                         f: T => S) extends Parser[I, S] {
+                                 f: T => S) extends Parser[I, S] {
   def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl)
 }
 
@@ -51,7 +52,11 @@
     if (in != "" && in.head == c) Set((c, in.tail)) else Set()
 }
 
-CharParser('a').parse("abc")
+val ap = CharParser('a')
+val bp = CharParser('b')
+
+val abp = SeqParser(ap, bp)
+MapParser(abp, ab => s"$ab").parse("abc")
 
 // an atomic parser for parsing strings according to a regex
 import scala.util.matching.Regex
@@ -67,14 +72,14 @@
 val NumParser = RegexParser("[0-9]+".r)
 def StrParser(s: String) = RegexParser(Regex.quote(s).r)
 
-NumParser.parse("a123a123bc") 
+NumParser.parse("123a123bc") 
 StrParser("else").parse("elsethen")
 
 
 // NumParserInt transforms a "string integer" into a propper Int
 // (needs "new" because MapParser is not a case class)
 
-val NumParserInt = new MapParser(NumParser, (s: String) => s.toInt)
+val NumParserInt = MapParser(NumParser, (s: String) => s.toInt)
 NumParserInt.parse("123abc")
 
 // the following string interpolation allows us to write 
@@ -111,6 +116,7 @@
 
 val NumParserInt2 = NumParser.map(_.toInt)
 
+val x = 1 + 3
 
 // A parser for palindromes (just returns them as string)
 //  since the parser is recursive it needs to be lazy
@@ -121,8 +127,8 @@
 }  
 
 // examples
-Pal.parse_all("abaaaba")
-Pal.parse("abaaaba")
+Pal.parse_all("abacaba")
+Pal.parse("abacaaba")
 
 println("Palindrome: " + Pal.parse_all("abaaaba"))
 
@@ -143,6 +149,9 @@
 
 // A parser for arithmetic expressions (Terms and Factors)
 
+"1 + 2 * 3"
+"(1 + 2) * 3"
+{
 lazy val E: Parser[String, Int] = {
   (T ~ p"+" ~ E).map{ case ((x, _), z) => x + z } ||
   (T ~ p"-" ~ E).map{ case ((x, _), z) => x - z } || T }
@@ -150,12 +159,12 @@
   (F ~ p"*" ~ T).map{ case ((x, _), z) => x * z } || F }
 lazy val F: Parser[String, Int] = {
   (p"(" ~ E ~ p")").map{ case ((_, y), _) => y } || NumParserInt }
-
+}
 println(E.parse_all("1+3+4"))
 println(E.parse("1+3+4"))
 println(E.parse_all("4*2+3"))
 println(E.parse_all("4*(2+3)"))
-println(E.parse_all("(4)*((2+3))"))
+println(E.parse_all("(4)*(((2+3)))"))
 println(E.parse_all("4/2+3"))
 println(E.parse("1 + 2 * 3"))
 println(E.parse_all("(1+2)+3"))
--- a/progs/parser-combinators/comb2.sc	Sat Nov 04 18:28:09 2023 +0000
+++ b/progs/parser-combinators/comb2.sc	Sat Nov 11 10:08:33 2023 +0000
@@ -16,7 +16,8 @@
 
 case class ~[+A, +B](x: A, y: B)
 
-
+val a = (1, "2")
+val v = new ~(1, "2")
 
 type IsSeq[I] = I => Seq[_]
 
Binary file slides/slides06.pdf has changed
--- a/slides/slides06.tex	Sat Nov 04 18:28:09 2023 +0000
+++ b/slides/slides06.tex	Sat Nov 11 10:08:33 2023 +0000
@@ -79,6 +79,24 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[t]
+\frametitle{Palindromes}
+
+A grammar for palindromes over the alphabet~\bl{$\{a,b\}$}:
+
+\only<1>{%
+\bl{\begin{plstx}[margin=1cm]
+: \meta{S} ::=  a\cdot \meta{S}\cdot a | b\cdot \meta{S}\cdot b | a | b | \epsilon\\
+\end{plstx}}}
+
+%\small
+%Can you find the grammar rules for matched parentheses?
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}[c]
 \frametitle{Parser Combinators}
 
@@ -103,6 +121,17 @@
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}[c]
+\frametitle{Abstract Parser Class}
+
+\small
+\lstinputlisting[language=Scala,xleftmargin=1mm]
+ {../progs/app7.scala}
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
 
 Atomic parsers, for example, number tokens