# HG changeset patch # User Christian Urban # Date 1697206057 -3600 # Node ID 66adcae6c7620d9b4bae362d8954f89b5230b693 # Parent 46eee459a999f64a01ab194c8c67cacf171e76c0 updated diff -r 46eee459a999 -r 66adcae6c762 handouts/ho04.pdf Binary file handouts/ho04.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 handouts/ho04.tex --- a/handouts/ho04.tex Thu Oct 05 14:36:54 2023 +0100 +++ b/handouts/ho04.tex Fri Oct 13 15:07:37 2023 +0100 @@ -513,7 +513,7 @@ $r_2$ or $r_{2s}$. Unfortunately, this is still not the right value in general because there might be some simplifications that happened inside $r_2$ and for which the simplification -function returned also a rectification function $f_{2s}$. So in +function also returned a rectification function $f_{2s}$. So in fact we need to apply this one too which gives \begin{center} @@ -894,7 +894,7 @@ been taught this language. Anyway, this language had over 600 keywords (or what they called \emph{reserved words}). Interestingly though this language is still used in 2020: during the height of - Corona crisis the State of New Jewrsey in the US was looking for + Corona crisis the State of New Jersey in the US was looking for COBOL programers who could fix the state's national insurance webpage. You were probably paid in gold and diamonds, if you were able to program in COBOL. If you fixed their webpage, surely you diff -r 46eee459a999 -r 66adcae6c762 handouts/ho05.pdf Binary file handouts/ho05.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 handouts/ho05.tex --- a/handouts/ho05.tex Thu Oct 05 14:36:54 2023 +0100 +++ b/handouts/ho05.tex Fri Oct 13 15:07:37 2023 +0100 @@ -82,7 +82,7 @@ \end{center} \noindent -from natural languages were the meaning of \emph{flies} depends on the +from natural languages where the meaning of \emph{flies} depends on the surrounding \emph{context} are avoided as much as possible. Here is an interesting video about C++ not being a context-free language @@ -368,7 +368,7 @@ \noindent The point is that we can in principle transform every left-recursive -grammar into one that is non-left-recursive one. This explains why often +grammar into one that is non-left-recursive. This explains why often the following grammar is used for arithmetic expressions: \begin{plstx}[margin=1cm] @@ -380,7 +380,7 @@ \noindent In this grammar all $\meta{E}$xpressions, $\meta{T}$erms and $\meta{F}$actors are in some way protected from being -left-recusive. For example if you start $\meta{E}$ you can derive +left-recursive. For example if you start $\meta{E}$ you can derive another one by going through $\meta{T}$, then $\meta{F}$, but then $\meta{E}$ is protected by the open-parenthesis in the last rule. @@ -408,7 +408,7 @@ $\epsilon$-rules from grammars. Consider again the grammar above for binary numbers where have a rule $\meta{B'} ::= \epsilon$. In this case we look for rules of the (generic) form \mbox{$\meta{A} := -\alpha\cdot\meta{B'}\cdot\beta$}. That is there are rules that use +\alpha\cdot\meta{B'}\cdot\beta$}. That is, there are rules that use $\meta{B'}$ and something ($\alpha$) is in front of $\meta{B'}$ and something follows ($\beta$). Such rules need to be replaced by additional rules of the form \mbox{$\meta{A} := \alpha\cdot\beta$}. @@ -419,7 +419,7 @@ : \meta{B} ::= 0 \cdot \meta{B'} | 1 \cdot \meta{B'}\\ \end{plstx} -\noindent To follow the general scheme of the transfromation, +\noindent To follow the general scheme of the transformation, the $\alpha$ is either is either $0$ or $1$, and the $\beta$ happens to be empty. So we need to generate new rules for the form \mbox{$\meta{A} := \alpha\cdot\beta$}, which in our particular diff -r 46eee459a999 -r 66adcae6c762 handouts/ho06.pdf Binary file handouts/ho06.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 handouts/ho06.tex --- a/handouts/ho06.tex Thu Oct 05 14:36:54 2023 +0100 +++ b/handouts/ho06.tex Fri Oct 13 15:07:37 2023 +0100 @@ -107,16 +107,20 @@ class we specify that \texttt{I} is the \emph{input type} of the parser combinator and that \texttt{T} is the \emph{output type}. This implies that the function \texttt{parse} takes an argument of type -\texttt{I} and returns a set of type \mbox{\texttt{Set[(T, I)]}}. +\texttt{I} and returns a set of type \mbox{\texttt{Set[(T, + I)]}}.\footnote{In the actual code on KEATS there is some + kerfuffle about correct type-bounds and using clauses. Ignore this + part of the implementation for the moment---it is not needed for + understanding how the code works.} \begin{center} \begin{lstlisting}[language=Scala] -abstract class Parser[I, T](using is: I => Seq[_]) { +abstract class Parser[I, T] { def parse(in: I): Set[(T, I)] def parse_all(in: I) : Set[T] = for ((hd, tl) <- parse(in); - if is(tl).isEmpty) yield hd + if tl.isEmpty) yield hd } \end{lstlisting} \end{center} @@ -214,8 +218,7 @@ \begin{lstlisting}[language=Scala, numbers=none] class AltParser[I, T] (p: => Parser[I, T], - q: => Parser[I, T])(using I => Seq[_]) - extends Parser[I, T] { + q: => Parser[I, T]) extends Parser[I, T] { def parse(in: I) = p.parse(in) ++ q.parse(in) } \end{lstlisting} @@ -306,8 +309,7 @@ \begin{lstlisting}[language=Scala,numbers=none] class SeqParser[I, T, S] (p: => Parser[I, T], - q: => Parser[I, S])(using I => Seq[_]) - extends Parser[I, (T, S)] { + q: => Parser[I, S]) extends Parser[I, (T, S)] { def parse(in: I) = for ((output1, u1) <- p.parse(in); (output2, u2) <- q.parse(u1)) @@ -483,7 +485,7 @@ \begin{lstlisting}[language=Scala,basicstyle=\small\ttfamily, numbers=none] class MapParser[I, T, S] (p: => Parser[I, T], - f: T => S)(using I => Seq[_]) extends Parser[I, S] { + f: T => S) extends Parser[I, S] { def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) } @@ -593,7 +595,7 @@ \texttt{Int}. Now \texttt{parse} generates \texttt{Set((123,abc))}, but this time \texttt{123} is an \texttt{Int}. Think carefully what the input and output type of the parser is without the semantic action -adn what with the semantic action (the type of the function can +and what with the semantic action (the type of the function can already tell you). Let us come back to semantic actions when we are going to implement actual context-free grammars. diff -r 46eee459a999 -r 66adcae6c762 handouts/ho07.pdf Binary file handouts/ho07.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 handouts/ho07.tex --- a/handouts/ho07.tex Thu Oct 05 14:36:54 2023 +0100 +++ b/handouts/ho07.tex Fri Oct 13 15:07:37 2023 +0100 @@ -12,7 +12,7 @@ \begin{document} -\fnote{\copyright{} Christian Urban, King's College London, 2017, 2018, 2019, 2020} +\fnote{\copyright{} Christian Urban, King's College London, 2017, 2018, 2019, 2020, 2023} \section*{Handout 7 (Compilation)} @@ -53,15 +53,16 @@ \noindent The input will be WHILE-programs; the output will be assembly files (with the file extension .j). Assembly files essentially contain -human-readable low-level code, meaning they are not just bits and bytes, -but rather something you can read and understand---with a bit of -practice of course. An \emph{assembler} will then translate the assembly -files into unreadable class- or binary-files the JVM or CPU can run. -Unfortunately, the Java ecosystem does not come with an assembler which -would be handy for our compiler-endeavour (unlike Microsoft's Common -Language Infrastructure for the .Net platform which has an assembler -out-of-the-box). As a substitute we shall use the 3rd-party programs -Jasmin and Krakatau +human-readable low-level code, meaning they are not just bits and +bytes, but rather something you can read and understand---with a bit +of practice of course. An \emph{assembler} will then translate the +assembly files into unreadable class- or binary-files the JVM or CPU +can run. Unfortunately, the Java ecosystem does not come with an +assembler which would be handy for our compiler-endeavour (unlike +Microsoft's Common Language Infrastructure for the .Net platform which +has an assembler out-of-the-box). As a substitute we shall use the +3rd-party programs Jasmin or Krakatau (Jasmin is the preferred +option---a \texttt{jasmin.jar}-file is available on KEATS): \begin{itemize} \item \url{http://jasmin.sourceforge.net} diff -r 46eee459a999 -r 66adcae6c762 hws/hw01.pdf Binary file hws/hw01.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 hws/hw02.pdf Binary file hws/hw02.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 hws/hw02.tex --- a/hws/hw02.tex Thu Oct 05 14:36:54 2023 +0100 +++ b/hws/hw02.tex Fri Oct 13 15:07:37 2023 +0100 @@ -118,7 +118,7 @@ normally not written---for example the JSON format for numbers explicitly forbids this. So 007 is not a number according to JSON. - \solution{Just numbers without leading 0s: $0 + (1..9)\cdot(0..1)^*$; + \solution{Just numbers without leading 0s: $0 + (1..9)\cdot(0..9)^*$; can be extended to decimal; similar for binary numbers } diff -r 46eee459a999 -r 66adcae6c762 hws/hw03.pdf Binary file hws/hw03.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 hws/hw03.tex --- a/hws/hw03.tex Thu Oct 05 14:36:54 2023 +0100 +++ b/hws/hw03.tex Fri Oct 13 15:07:37 2023 +0100 @@ -69,7 +69,11 @@ What is the language recognised by this automaton? - + \solution{ + All strings consisting of 0 or more a's then 1 or more b's, + which is equivalent to the language of the regular + expression $a^* \cdot b \cdot b^*$. + } \item Give a non-deterministic finite automaton that can recognise the language $L(a\cdot (a + b)^* \cdot c)$. @@ -109,6 +113,8 @@ } + + \item Given the following deterministic finite automaton over the alphabet $\{a, b\}$, find an automaton that recognises the complement language. (Hint: Recall that @@ -178,6 +184,12 @@ \end{tikzpicture} \end{center} + \solution{ + The DFA has three states Q0,Q1,Q2 with Q0 starting state and Q2 accepting. + The transitions are (Q0,a)-> Q1 (Q0,b)->Q0 (Q1,a)->Q2 (Q1,b)->Q0 + (Q2,a)->Q2 (Q2,b)->Q0. + } + \item %%\textbf{(Deleted for 2017, 2018, 2019)} Given the following deterministic finite automaton over the alphabet $\{0, 1\}$, find the corresponding minimal automaton. In @@ -232,6 +244,10 @@ Arden's lemma which states that an equation of the form $q = q\cdot r + s$ has the unique solution $q = s \cdot r^*$.) + \solution{ + $(b + ab + aa(a^*)b)^* \cdot (1 + a)$ + } + \item If a non-deterministic finite automaton (NFA) has $n$ states. How many states does a deterministic automaton (DFA) that can recognise the same language diff -r 46eee459a999 -r 66adcae6c762 hws/hw04.pdf Binary file hws/hw04.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 hws/hw05.pdf Binary file hws/hw05.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 hws/hw06.pdf Binary file hws/hw06.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 hws/hw07.pdf Binary file hws/hw07.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 hws/hw08.pdf Binary file hws/hw08.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 hws/hw08.tex --- a/hws/hw08.tex Thu Oct 05 14:36:54 2023 +0100 +++ b/hws/hw08.tex Fri Oct 13 15:07:37 2023 +0100 @@ -19,6 +19,38 @@ \item What is the main difference between the Java assembler (as processed by Jasmin) and Java Byte Code? +\item Remember symbolic labels in the Jasmin-assembler are meant to + be used for jumps (like in loops or if-conditions). Assume + you generated a Jasmin-file with some redundant + labels, that is some labels are not used in your code for any jumps. For + example \texttt{L\_begin} and \texttt{L\_end} are not used + in the fol,lowing code-snippet: + +\begin{lstlisting}[language=JVMIS,numbers=none] +L_begin: +ldc 1 +ldc 2 +ldc 3 +imul +ldc 4 +ldc 3 +isub +iadd +iadd +L_end: +\end{lstlisting} + + Do these redundant labels affect the size of the generated + JVM-code? (Hint: What are the labels translated to by + the Jasmin-assembler?). + + \solution{The answer is no. The reason is that assemblers + calculate for labels either relative or explicit adresses, + which are then used in the JVM-byte-code. Relative addresses + are like ``jump 10 bytes forward'' or ``12 bytes backward''. So + additional labels do not increase the size of the generated code.} + + \item Consider the following Scala snippet. Are the two functions \texttt{is\_even} and \texttt{is\_odd} tail-recursive? diff -r 46eee459a999 -r 66adcae6c762 hws/hw09.pdf Binary file hws/hw09.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 hws/hw09.tex --- a/hws/hw09.tex Thu Oct 05 14:36:54 2023 +0100 +++ b/hws/hw09.tex Fri Oct 13 15:07:37 2023 +0100 @@ -95,7 +95,7 @@ \item As an optimisation technique, a compiler might want to detect `dead code' and not generate anything for this code. Why does this optimisation technique have the - potential of speeding up the run-time of a program? (Hint: On what CPUs are programs + potential of speeding up the run-time of a program? (Hint: On what kind of CPUs are programs run nowadays?) \item In an earlier question, we analysed the advantages of having a lexer-phase @@ -115,13 +115,22 @@ \item What is the difference between a parse tree and an abstract syntax tree? Give some simple examples for each of them. - -\item Give a description of how the Brzozowski matcher works. - The description should be coherent and logical. +\item What are the two main features of code in + static single assignment form (SSA)? -\item Give a description of how a compiler for the While-language can - be implemented. You should assume you are producing code for the JVM. - The description should be coherent and logical. + \solution{ + Variables are only assigned once and all operations are + primitive (in the sense of simple arithmetic operations, + function calls and so on). + } + + +%\item Give a description of how the Brzozowski matcher works. +% The description should be coherent and logical. + +%\item Give a description of how a compiler for the While-language can +% be implemented. You should assume you are producing code for the JVM. +% The description should be coherent and logical. \item \POSTSCRIPT diff -r 46eee459a999 -r 66adcae6c762 progs/matcher/re1.sc --- a/progs/matcher/re1.sc Thu Oct 05 14:36:54 2023 +0100 +++ b/progs/matcher/re1.sc Fri Oct 13 15:07:37 2023 +0100 @@ -56,7 +56,7 @@ // some examples from the homework -val r = SEQ(CHAR('b'), CHAR('c')) +val r = SEQ(CHAR('a'), CHAR('c')) matcher(r, "ac") val r1 = STAR(ALT(SEQ(CHAR('a'), CHAR('b')), CHAR('b'))) diff -r 46eee459a999 -r 66adcae6c762 progs/parser-combinators/comb1.sc --- a/progs/parser-combinators/comb1.sc Thu Oct 05 14:36:54 2023 +0100 +++ b/progs/parser-combinators/comb1.sc Fri Oct 13 15:07:37 2023 +0100 @@ -10,7 +10,9 @@ // using is: I => Seq[_], which means that the input // type 'I' needs to be a sequence. -abstract class Parser[I, T](using is: I => Seq[_]) { +type IsSeq[I] = I => Seq[_] + +abstract class Parser[I, T](using is: IsSeq[I]) { def parse(in: I): Set[(T, I)] def parse_all(in: I) : Set[T] = @@ -20,23 +22,25 @@ // parser combinators + + // alternative parser -class AltParser[I, T](p: => Parser[I, T], - q: => Parser[I, T])(using I => Seq[_]) extends Parser[I, T] { +class AltParser[I : IsSeq, T](p: => Parser[I, T], + q: => Parser[I, T]) extends Parser[I, T] { def parse(in: I) = p.parse(in) ++ q.parse(in) } // sequence parser -class SeqParser[I, T, S](p: => Parser[I, T], - q: => Parser[I, S])(using I => Seq[_]) extends Parser[I, (T, S)] { +class SeqParser[I: IsSeq, T, S](p: => Parser[I, T], + q: => Parser[I, S]) extends Parser[I, (T, S)] { def parse(in: I) = for ((hd1, tl1) <- p.parse(in); (hd2, tl2) <- q.parse(tl1)) yield ((hd1, hd2), tl2) } // map parser -class MapParser[I, T, S](p: => Parser[I, T], - f: T => S)(using I => Seq[_]) extends Parser[I, S] { +class MapParser[I : IsSeq, T, S](p: => Parser[I, T], + f: T => S) extends Parser[I, S] { def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) } @@ -86,7 +90,7 @@ (p"else").parse("elsethen") // more convenient syntax for parser combinators -extension [I, T](p: Parser[I, T])(using I => Seq[_]) { +extension [I: IsSeq, T](p: Parser[I, T]) { def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) def map[S](f: => T => S) = new MapParser[I, T, S](p, f) diff -r 46eee459a999 -r 66adcae6c762 progs/parser-combinators/comb2.sc --- a/progs/parser-combinators/comb2.sc Thu Oct 05 14:36:54 2023 +0100 +++ b/progs/parser-combinators/comb2.sc Fri Oct 13 15:07:37 2023 +0100 @@ -18,6 +18,8 @@ case class ~[+A, +B](x: A, y: B) +type IsSeq[I] = I => Seq[_] + abstract class Parser[I, T](using is: I => Seq[_]) { def parse(in: I): Set[(T, I)] @@ -29,22 +31,22 @@ // parser combinators // alternative parser -class AltParser[I, T](p: => Parser[I, T], - q: => Parser[I, T])(using I => Seq[_]) extends Parser[I, T] { +class AltParser[I : IsSeq, T](p: => Parser[I, T], + q: => Parser[I, T]) extends Parser[I, T] { def parse(in: I) = p.parse(in) ++ q.parse(in) } // sequence parser -class SeqParser[I, T, S](p: => Parser[I, T], - q: => Parser[I, S])(using I => Seq[_]) extends Parser[I, ~[T, S]] { +class SeqParser[I : IsSeq, T, S](p: => Parser[I, T], + q: => Parser[I, S]) extends Parser[I, ~[T, S]] { def parse(in: I) = for ((hd1, tl1) <- p.parse(in); (hd2, tl2) <- q.parse(tl1)) yield (new ~(hd1, hd2), tl2) } // map parser -class maparser[I, T, S](p: => Parser[I, T], - f: T => S)(using I => Seq[_]) extends Parser[I, S] { +class maparser[I : IsSeq, T, S](p: => Parser[I, T], + f: T => S) extends Parser[I, S] { def parse(in: I) = for ((hd, tl) <- p.parse(in)) yield (f(hd), tl) } @@ -90,7 +92,7 @@ // more convenient syntax for parser combinators -extension [I, T](p: Parser[I, T])(using I => Seq[_]) { +extension [I : IsSeq, T](p: Parser[I, T]) { def ||(q : => Parser[I, T]) = new AltParser[I, T](p, q) def ~[S] (q : => Parser[I, S]) = new SeqParser[I, T, S](p, q) def map[S](f: => T => S) = new maparser[I, T, S](p, f) diff -r 46eee459a999 -r 66adcae6c762 slides/slides02.pdf Binary file slides/slides02.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 slides/slides02.tex --- a/slides/slides02.tex Thu Oct 05 14:36:54 2023 +0100 +++ b/slides/slides02.tex Fri Oct 13 15:07:37 2023 +0100 @@ -411,6 +411,13 @@ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +{ +\setbeamercolor{background canvas}{bg=cream} +\begin{frame}<1-6>[c] +\end{frame} +} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] \frametitle{Examples} diff -r 46eee459a999 -r 66adcae6c762 slides/slides03.pdf Binary file slides/slides03.pdf has changed diff -r 46eee459a999 -r 66adcae6c762 slides/slides03.tex --- a/slides/slides03.tex Thu Oct 05 14:36:54 2023 +0100 +++ b/slides/slides03.tex Fri Oct 13 15:07:37 2023 +0100 @@ -61,6 +61,15 @@ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +{ +\setbeamercolor{background canvas}{bg=cream} +\begin{frame}<1-10>[c] +\end{frame} +} + + + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %\begin{frame}[c] %\frametitle{Scala Book, Exams} @@ -1442,6 +1451,13 @@ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +{ +\setbeamercolor{background canvas}{bg=cream} +\begin{frame}<1-10>[c] +\end{frame} +} + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] \frametitle{Regular Languages (3)}