# HG changeset patch # User Christian Urban # Date 1636130875 0 # Node ID 3ffe978a5664841c5729eed575ee893f04edab00 # Parent 017f621f5835dbef9b2a62ef7b1b0df1cbd556f1 updated diff -r 017f621f5835 -r 3ffe978a5664 README --- a/README Thu Nov 04 12:20:12 2021 +0000 +++ b/README Fri Nov 05 16:47:55 2021 +0000 @@ -1,3 +1,10 @@ +TA meeting + +https://web.microsoftstream.com/video/cec0d34d-77f5-4940-9bad-628416d4c521 + +============= + + Wartremover ./wartremover -traverser org.wartremover.warts.Return -traverser org.wartremover.warts.Var -traverser org.wartremover.warts.MutableDataStructures $i diff -r 017f621f5835 -r 3ffe978a5664 core_templates1/collatz.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core_templates1/collatz.scala Fri Nov 05 16:47:55 2021 +0000 @@ -0,0 +1,45 @@ +// Core Part 1 about the 3n+1 conjecture +//============================================ + +object C1 { + +//(1) Complete the collatz function below. It should +// recursively calculate the number of steps needed +// until the collatz series reaches the number 1. +// If needed, you can use an auxiliary function that +// performs the recursion. The function should expect +// arguments in the range of 1 to 1 Million. + +def collatz(n: Long) : Long = ??? + + +//(2) Complete the collatz_max function below. It should +// calculate how many steps are needed for each number +// from 1 up to a bound and then calculate the maximum number of +// steps and the corresponding number that needs that many +// steps. Again, you should expect bounds in the range of 1 +// up to 1 Million. The first component of the pair is +// the maximum number of steps and the second is the +// corresponding number. + +def collatz_max(bnd: Long) : (Long, Long) = ??? + +//(3) Implement a function that calculates the last_odd +// number in a collatz series. For this implement an +// is_pow_of_two function which tests whether a number +// is a power of two. The function is_hard calculates +// whether 3n + 1 is a power of two. Again you can +// assume the input ranges between 1 and 1 Million, +// and also assume that the input of last_odd will not +// be a power of 2. + +def is_pow_of_two(n: Long) : Boolean = ??? + +def is_hard(n: Long) : Boolean = ??? + +def last_odd(n: Long) : Long = ??? + +} + + + diff -r 017f621f5835 -r 3ffe978a5664 core_templates2/docdiff.jar Binary file core_templates2/docdiff.jar has changed diff -r 017f621f5835 -r 3ffe978a5664 core_templates2/docdiff.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core_templates2/docdiff.scala Fri Nov 05 16:47:55 2021 +0000 @@ -0,0 +1,115 @@ +// Core Part 2 about Code Similarity +//=================================== + + +object C2 { + + +//(1) Complete the clean function below. It should find +// all words in a string using the regular expression +// \w+ and the library function +// +// some_regex.findAllIn(some_string) +// +// The words should be Returned as a list of strings. + + +def clean(s: String) : List[String] = ??? + + + +//(2) The function occurrences calculates the number of times +// strings occur in a list of strings. These occurrences should +// be calculated as a Map from strings to integers. + + +def occurrences(xs: List[String]): Map[String, Int] = ??? + + +//(3) This functions calculates the dot-product of two documents +// (list of strings). For this it calculates the occurrence +// maps from (2) and then multiplies the corresponding occurrences. +// If a string does not occur in a document, the product is zero. +// The function finally sums up all products. + + +def prod(lst1: List[String], lst2: List[String]) : Int = ??? + + +//(4) Complete the functions overlap and similarity. The overlap of +// two documents is calculated by the formula given in the assignment +// description. The similarity of two strings is given by the overlap +// of the cleaned strings (see (1)). + + +def overlap(lst1: List[String], lst2: List[String]) : Double = ??? + +def similarity(s1: String, s2: String) : Double = ??? + + + +/* Test cases + + +val list1 = List("a", "b", "b", "c", "d") +val list2 = List("d", "b", "d", "b", "d") + +occurrences(List("a", "b", "b", "c", "d")) // Map(a -> 1, b -> 2, c -> 1, d -> 1) +occurrences(List("d", "b", "d", "b", "d")) // Map(d -> 3, b -> 2) + +prod(list1,list2) // 7 + +overlap(list1, list2) // 0.5384615384615384 +overlap(list2, list1) // 0.5384615384615384 +overlap(list1, list1) // 1.0 +overlap(list2, list2) // 1.0 + +// Plagiarism examples from +// https://desales.libguides.com/avoidingplagiarism/examples + +val orig1 = """There is a strong market demand for eco-tourism in +Australia. Its rich and diverse natural heritage ensures Australia's +capacity to attract international ecotourists and gives Australia a +comparative advantage in the highly competitive tourism industry.""" + +val plag1 = """There is a high market demand for eco-tourism in +Australia. Australia has a comparative advantage in the highly +competitive tourism industry due to its rich and varied natural +heritage which ensures Australia's capacity to attract international +ecotourists.""" + +similarity(orig1, plag1) // 0.8679245283018868 + + +// Plagiarism examples from +// https://www.utc.edu/library/help/tutorials/plagiarism/examples-of-plagiarism.php + +val orig2 = """No oil spill is entirely benign. Depending on timing and +location, even a relatively minor spill can cause significant harm to +individual organisms and entire populations. Oil spills can cause +impacts over a range of time scales, from days to years, or even +decades for certain spills. Impacts are typically divided into acute +(short-term) and chronic (long-term) effects. Both types are part of a +complicated and often controversial equation that is addressed after +an oil spill: ecosystem recovery.""" + +val plag2 = """There is no such thing as a "good" oil spill. If the +time and place are just right, even a small oil spill can cause damage +to sensitive ecosystems. Further, spills can cause harm days, months, +years, or even decades after they occur. Because of this, spills are +usually broken into short-term (acute) and long-term (chronic) +effects. Both of these types of harm must be addressed in ecosystem +recovery: a controversial tactic that is often implemented immediately +following an oil spill.""" + +overlap(clean(orig2), clean(plag2)) // 0.728 +similarity(orig2, plag2) // 0.728 + + + +// The punchline: everything above 0.6 looks suspicious and +// should be investigated by staff. + +*/ + +} diff -r 017f621f5835 -r 3ffe978a5664 core_templates3/postfix.jar Binary file core_templates3/postfix.jar has changed diff -r 017f621f5835 -r 3ffe978a5664 core_templates3/postfix.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core_templates3/postfix.scala Fri Nov 05 16:47:55 2021 +0000 @@ -0,0 +1,81 @@ +// Shunting Yard Algorithm +// by Edsger Dijkstra +// ======================== + +object C3a { + +// type of tokens +type Toks = List[String] + +// the operations in the basic version of the algorithm +val ops = List("+", "-", "*", "/") + +// the precedences of the operators +val precs = Map("+" -> 1, + "-" -> 1, + "*" -> 2, + "/" -> 2) + +// helper function for splitting strings into tokens +def split(s: String) : Toks = s.split(" ").toList + + +// (1) Implement below the shunting yard algorithm. The most +// convenient way to this in Scala is to implement a recursive +// function and to heavily use pattern matching. The function syard +// takes some input tokens as first argument. The second and third +// arguments represent the stack and the output of the shunting yard +// algorithm. +// +// In the marking, you can assume the function is called only with +// an empty stack and an empty output list. You can also assume the +// input os only properly formatted (infix) arithmetic expressions +// (all parentheses will be well-nested, the input only contains +// operators and numbers). + +// You can implement any additional helper function you need. I found +// it helpful to implement two auxiliary functions for the pattern matching: +// + +def is_op(op: String) : Boolean = ??? +def prec(op1: String, op2: String) : Boolean = ??? + + +def syard(toks: Toks, st: Toks = Nil, out: Toks = Nil) : Toks = ??? + + +// test cases +//syard(split("3 + 4 * ( 2 - 1 )")) // 3 4 2 1 - * + +//syard(split("10 + 12 * 33")) // 10 12 33 * + +//syard(split("( 5 + 7 ) * 2")) // 5 7 + 2 * +//syard(split("5 + 7 / 2")) // 5 7 2 / + +//syard(split("5 * 7 / 2")) // 5 7 * 2 / +//syard(split("9 + 24 / ( 7 - 3 )")) // 9 24 7 3 - / + + +//syard(split("3 + 4 + 5")) // 3 4 + 5 + +//syard(split("( ( 3 + 4 ) + 5 )")) // 3 4 + 5 + +//syard(split("( 3 + ( 4 + 5 ) )")) // 3 4 5 + + +//syard(split("( ( ( 3 ) ) + ( ( 4 + ( 5 ) ) ) )")) // 3 4 5 + + + + +// (2) Implement a compute function that evaluates an input list +// in postfix notation. This function takes a list of tokens +// and a stack as argumenta. The function should produce the +// result as an integer using the stack. You can assume +// this function will be only called with proper postfix +// expressions. + +def compute(toks: Toks, st: List[Int] = Nil) : Int = ??? + + +// test cases +// compute(syard(split("3 + 4 * ( 2 - 1 )"))) // 7 +// compute(syard(split("10 + 12 * 33"))) // 406 +// compute(syard(split("( 5 + 7 ) * 2"))) // 24 +// compute(syard(split("5 + 7 / 2"))) // 8 +// compute(syard(split("5 * 7 / 2"))) // 17 +// compute(syard(split("9 + 24 / ( 7 - 3 )"))) // 15 + +} + + diff -r 017f621f5835 -r 3ffe978a5664 core_templates3/postfix2.jar Binary file core_templates3/postfix2.jar has changed diff -r 017f621f5835 -r 3ffe978a5664 core_templates3/postfix2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core_templates3/postfix2.scala Fri Nov 05 16:47:55 2021 +0000 @@ -0,0 +1,69 @@ +// Shunting Yard Algorithm +// including Associativity for Operators +// ===================================== + +object C3b { + + +// type of tokens +type Toks = List[String] + +// helper function for splitting strings into tokens +def split(s: String) : Toks = s.split(" ").toList + +// left- and right-associativity +abstract class Assoc +case object LA extends Assoc +case object RA extends Assoc + + +// power is right-associative, +// everything else is left-associative +def assoc(s: String) : Assoc = s match { + case "^" => RA + case _ => LA +} + + +// the precedences of the operators +val precs = Map("+" -> 1, + "-" -> 1, + "*" -> 2, + "/" -> 2, + "^" -> 4) + +// the operations in the basic version of the algorithm +val ops = List("+", "-", "*", "/", "^") + +// (3) Implement the extended version of the shunting yard algorithm. +// This version should properly account for the fact that the power +// operation is right-associative. Apart from the extension to include +// the power operation, you can make the same assumptions as in +// basic version. + +def syard(toks: Toks, st: Toks = Nil, out: Toks = Nil) : Toks = ??? + + +// test cases +// syard(split("3 + 4 * 8 / ( 5 - 1 ) ^ 2 ^ 3")) // 3 4 8 * 5 1 - 2 3 ^ ^ / + + + +// (4) Implement a compute function that produces an Int for an +// input list of tokens in postfix notation. + +def compute(toks: Toks, st: List[Int] = Nil) : Int = ??? + + +// test cases +// compute(syard(split("3 + 4 * ( 2 - 1 )"))) // 7 +// compute(syard(split("10 + 12 * 33"))) // 406 +// compute(syard(split("( 5 + 7 ) * 2"))) // 24 +// compute(syard(split("5 + 7 / 2"))) // 8 +// compute(syard(split("5 * 7 / 2"))) // 17 +// compute(syard(split("9 + 24 / ( 7 - 3 )"))) // 15 +// compute(syard(split("4 ^ 3 ^ 2"))) // 262144 +// compute(syard(split("4 ^ ( 3 ^ 2 )"))) // 262144 +// compute(syard(split("( 4 ^ 3 ) ^ 2"))) // 4096 +// compute(syard(split("( 3 + 1 ) ^ 2 ^ 3"))) // 65536 + +} diff -r 017f621f5835 -r 3ffe978a5664 cws/core_cw01.pdf Binary file cws/core_cw01.pdf has changed diff -r 017f621f5835 -r 3ffe978a5664 cws/core_cw01.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cws/core_cw01.tex Fri Nov 05 16:47:55 2021 +0000 @@ -0,0 +1,299 @@ +% !TEX program = xelatex +\documentclass{article} +\usepackage{../style} +\usepackage{disclaimer} +\usepackage{../langs} + + + +\begin{document} + +\section*{Core Part 1 (Scala, 3 Marks)} + +\mbox{}\hfill\textit{``The most effective debugging tool is still careful thought,}\\ +\mbox{}\hfill\textit{coupled with judiciously placed print statements.''}\smallskip\\ +\mbox{}\hfill\textit{ --- Brian W. Kernighan, in Unix for Beginners (1979)}\bigskip + + +\IMPORTANT{This part is about Scala. It is due on \cwSIX{} at 5pm and worth 3\%. +Any 1\% you achieve in the preliminary part counts as your ``weekly engagement''.} + +\noindent +Also note that the running time of each part will be restricted to a +maximum of 30 seconds on my laptop. + +\DISCLAIMER{} + +\subsection*{Reference Implementation} + +Like the C++ assignments, the Scala assignments will work like this: you +push your files to GitHub and receive (after sometimes a long delay) some +automated feedback. In the end we take a snapshot of the submitted files and +apply an automated marking script to them.\medskip + +\noindent +In addition, the Scala coursework comes with a reference implementation +in form of \texttt{jar}-files. This allows you to run any test cases on +your own computer. For example you can call Scala on the command line +with the option \texttt{-cp collatz.jar} and then query any function +from the template file. Say you want to find out what the functions +\texttt{collatz} and \texttt{collatz\_max} produce: for this you just +need to prefix them with the object name \texttt{C1}. If you want to +find out what these functions produce for the argument \texttt{6}, you +would type something like: + +\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small] +$ scala -cp collatz.jar + +scala> C1.collatz(6) +... +scala> C1.collatz_max(6) +... +\end{lstlisting}%$ + +\subsection*{Hints} + +\noindent +\textbf{For the Core Part 1:} useful math operators: \texttt{\%} for modulo, \texttt{\&} for bit-wise and; useful +functions: \mbox{\texttt{(1\,to\,10)}} for ranges, \texttt{.toInt}, +\texttt{.toList} for conversions, you can use \texttt{List(...).max} for the +maximum of a list, \texttt{List(...).indexOf(...)} for the first index of +a value in a list.\bigskip + + + +\newpage +\subsection*{Core Part 1 (3 Marks, file collatz.scala)} + +This part is about function definitions and recursion. You are asked +to implement a Scala program that tests examples of the +\emph{$3n + 1$-conjecture}, also called \emph{Collatz + conjecture}.\video{https://www.youtube.com./watch?v=LqKpkdRRLZw} +This conjecture can be described as follows: Start with any positive +number $n$ greater than $0$: + +\begin{itemize} +\item If $n$ is even, divide it by $2$ to obtain $n / 2$. +\item If $n$ is odd, multiply it by $3$ and add $1$ to obtain $3n + + 1$. +\item Repeat this process and you will always end up with $1$. +\end{itemize} + +\noindent +For example if you start with, say, $6$ and $9$, you obtain the +two \emph{Collatz series} +% +\[ +\begin{array}{@{}l@{\hspace{5mm}}l@{}} +6, 3, 10, 5, 16, 8, 4, 2, 1 & \text{(= 8 steps)}\\ +9, 28, 14, 7, 22, 11, 34, 17, 52, 26, 13, 40, 20, 10, 5, 16, 8, 4, 2, 1 & \text{(= 19 steps)}\\ +\end{array} +\] + +\noindent +As you can see, the numbers go up and down like a roller-coaster, but +curiously they seem to always terminate in $1$. Nobody knows why. The +conjecture is that this will \emph{always} happen for every number +greater than 0.\footnote{While it is relatively easy to test this +conjecture with particular numbers, it is an interesting open problem to +\emph{prove} that the conjecture is true for \emph{all} numbers ($> 0$). +Paul Erd\"o{}s, a famous mathematician you might have heard about, said +about this conjecture: ``Mathematics may not [yet] be ready for such +problems.'' and also offered a \$500 cash prize for its solution. +Jeffrey Lagarias, another mathematician, claimed that based only on +known information about this problem, ``this is an extraordinarily +difficult problem, completely out of reach of present day mathematics.'' +There is also a \href{https://xkcd.com/710/}{xkcd} cartoon about this +conjecture\here{https://xkcd.com/710/}). If you are able to solve this +conjecture, you will definitely get famous.}\bigskip + +\noindent +\textbf{Tasks} + +\begin{itemize} +\item[(1)] You are asked to implement a recursive function that + calculates the number of steps needed until a series ends + with $1$. In case of starting with $6$, it takes $8$ steps and in + case of starting with $9$, it takes $19$ (see above). We assume it + takes $0$ steps, if we start with $1$. In order to + try out this function with large numbers, you should use + \texttt{Long} as argument type, instead of \texttt{Int}. You can + assume this function will be called with numbers between $1$ and + $1$ Million. \hfill[1 Mark] + +\item[(2)] Write a second function that takes an upper bound as + an argument and calculates the steps for all numbers in the range from + 1 up to this bound (the bound including). It returns the maximum number of + steps and the corresponding number that needs that many steps. More + precisely it returns a pair where the first component is the number + of steps and the second is the corresponding number. \hfill\mbox{[1 + Mark]} + +\item[(3)] Write a function that calculates \emph{hard + numbers} \here{https://medium.com/cantors-paradise/the-collatz-conjecture-some-shocking-results-from-180-000-iterations-7fea130d0377} + in the Collatz series---these are the last odd numbers just before a + power of two is reached. For this, implement an + \textit{is-power-of-two} function which tests whether a number is a + power of two. The easiest way to implement this is by using the + bit-operator $\&$ of Scala. For a power of two, say $n$ with $n > 0$, it + holds that $n \;\&\; (n - 1)$ is equal to zero. I let you think why + this is the case. + + The function \textit{is-hard} calculates whether + $3n + 1$ is a power of two. Finally the \textit{last-odd} function + calculates the last odd number before a power of 2 in the Collatz + series. This means for example when starting with 9, + we receive 5 as the last odd number. Surprisingly a lot of numbers + have 5 as last-odd number. But for example for 113 we obtain 85, + because of the series + % + \[113, 340, 170, \,\fbox{85}\,, 256, 128, 64, 32, 16, 8, 4, 2, 1\] + + The \textit{last-odd} function will only be called with numbers that are not + powers of 2 themselves.\hfill\mbox{[1 Mark]} +\end{itemize} + +\noindent +\textbf{Test Data:} Some test ranges and cases are: + +\begin{itemize} +\item 1 to 10 where $9$ takes 19 steps +\item 1 to 100 where $97$ takes 118 steps, +\item 1 to 1,000 where $871$ takes 178 steps, +\item 1 to 10,000 where $6,171$ takes 261 steps, +\item 1 to 100,000 where $77,031$ takes 350 steps, +\item 1 to 1 Million where $837,799$ takes 524 steps + %% runs out of stack space + %% \item[$\bullet$] $1 - 10$ million where $8,400,511$ takes 685 steps +\item 21 is the last odd number for 84 +\item 341 is the last odd number for 201, 604, 605 and 8600 + +\end{itemize} + + + + + + +\end{document} + + +%%%%%%% Historical Stuff +\newpage + +This part is about web-scraping and list-processing in Scala. It uses +online data about the per-capita alcohol consumption for each country +(per year?), and a file containing the data about the population size of +each country. From this data you are supposed to estimate how many +litres of pure alcohol are consumed worldwide.\bigskip + +\noindent +\textbf{Tasks (file alcohol.scala):} + +\begin{itemize} +\item[(1)] Write a function that given an URL requests a + comma-separated value (CSV) list. We are interested in the list + from the following URL + +\begin{center} + \url{https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv} +\end{center} + +\noindent Your function should take a string (the URL) as input, and +produce a list of strings as output, where each string is one line in +the corresponding CSV-list. This list from the URL above should +contain 194 lines.\medskip + +\noindent +Write another function that can read the file \texttt{population.csv} +from disk (the file is distributed with the assignment). This +function should take a string as argument, the file name, and again +return a list of strings corresponding to each entry in the +CSV-list. For \texttt{population.csv}, this list should contain 216 +lines.\hfill[1 Mark] + + +\item[(2)] Unfortunately, the CSV-lists contain a lot of ``junk'' and we + need to extract the data that interests us. From the header of the + alcohol list, you can see there are 5 columns + + \begin{center} + \begin{tabular}{l} + \texttt{country (name),}\\ + \texttt{beer\_servings,}\\ + \texttt{spirit\_servings,}\\ + \texttt{wine\_servings,}\\ + \texttt{total\_litres\_of\_pure\_alcohol} + \end{tabular} + \end{center} + + \noindent + Write a function that extracts the data from the first column, + the country name, and the data from the fifth column (converted into + a \texttt{Double}). For this go through each line of the CSV-list + (except the first line), use the \texttt{split(",")} function to + divide each line into an array of 5 elements. Keep the data from the + first and fifth element in these arrays.\medskip + + \noindent + Write another function that processes the population size list. This + is already of the form country name and population size.\footnote{Your + friendly lecturer already did the messy processing for you from the + Worldbank database, see \url{https://github.com/datasets/population/tree/master/data} for the original.} Again, split the + strings according to the commas. However, this time generate a + \texttt{Map} from country names to population sizes.\hfill[1 Mark] + +\item[(3)] In (2) you generated the data about the alcohol consumption + per-capita for each country, and also the population size for each + country. From this generate next a sorted(!) list of the overall + alcohol consumption for each country. The list should be sorted from + highest alcohol consumption to lowest. The difficulty is that the + data is scraped off from ``random'' sources on the Internet and + annoyingly the spelling of some country names does not always agree in both + lists. For example the alcohol list contains + \texttt{Bosnia-Herzegovina}, while the population writes this country as + \texttt{Bosnia and Herzegovina}. In your sorted + overall list include only countries from the alcohol list, whose + exact country name is also in the population size list. This means + you can ignore countries like Bosnia-Herzegovina from the overall + alcohol consumption. There are 177 countries where the names + agree. The UK is ranked 10th on this list by + consuming 671,976,864 Litres of pure alcohol each year.\medskip + + \noindent + Finally, write another function that takes an integer, say + \texttt{n}, as argument. You can assume this integer is between 0 + and 177 (the number of countries in the sorted list above). The + function should return a triple, where the first component is the + sum of the alcohol consumption in all countries (on the list); the + second component is the sum of the \texttt{n}-highest alcohol + consumers on the list; and the third component is the percentage the + \texttt{n}-highest alcohol consumers drink with respect to the + the world consumption. You will see that according to our data, 164 + countries (out of 177) gobble up 100\% of the World alcohol + consumption.\hfill\mbox{[1 Mark]} +\end{itemize} + +\noindent +\textbf{Hints:} useful list functions: \texttt{.drop(n)}, +\texttt{.take(n)} for dropping or taking some elements in a list, +\texttt{.getLines} for separating lines in a string; +\texttt{.sortBy(\_.\_2)} sorts a list of pairs according to the second +elements in the pairs---the sorting is done from smallest to highest; +useful \texttt{Map} functions: \texttt{.toMap} converts a list of +pairs into a \texttt{Map}, \texttt{.isDefinedAt(k)} tests whether the +map is defined at that key, that is would produce a result when +called with this key; useful data functions: \texttt{Source.fromURL}, +\texttt{Source.fromFile} for obtaining a webpage and reading a file. + +\newpage + + + + + + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 017f621f5835 -r 3ffe978a5664 cws/core_cw02.pdf Binary file cws/core_cw02.pdf has changed diff -r 017f621f5835 -r 3ffe978a5664 cws/core_cw02.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cws/core_cw02.tex Fri Nov 05 16:47:55 2021 +0000 @@ -0,0 +1,161 @@ +% !TEX program = xelatex +\documentclass{article} +\usepackage{../style} +\usepackage{disclaimer} +\usepackage{../langs} + +\begin{document} + + +%% should ask to lower case the words. + +\section*{Core Part 2 (Scala, 3 Marks)} + +\mbox{}\hfill\textit{``What one programmer can do in one month,}\\ +\mbox{}\hfill\textit{two programmers can do in two months.''}\smallskip\\ +\mbox{}\hfill\textit{ --- Frederick P.~Brooks (author of The Mythical Man-Month)}\bigskip\medskip + +\IMPORTANT{You are asked to implement a Scala program for measuring similarity in + texts. The preliminary part is due on \cwSEVEN{} at 5pm and worth 3\%. + Any 1\% you achieve in the preliminary part counts as your ``weekly engagement''.} + +\noindent +Also note that the running time of each part will be restricted to a +maximum of 30 seconds on my laptop. + +\DISCLAIMER{} + + +\subsection*{Reference Implementation} + +Like the C++ part, the Scala part works like this: you +push your files to GitHub and receive (after sometimes a long delay) some +automated feedback. In the end we will take a snapshot of the submitted files and +apply an automated marking script to them.\medskip + +\noindent +In addition, the Scala part comes with reference +implementations in form of \texttt{jar}-files. This allows you to run +any test cases on your own computer. For example you can call Scala on +the command line with the option \texttt{-cp docdiff.jar} and then +query any function from the template file. Say you want to find out +what the function \texttt{occurrences} produces: for this you just need +to prefix it with the object name \texttt{C2}. If you want to find out what +these functions produce for the list \texttt{List("a", "b", "b")}, +you would type something like: + +\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small] +$ scala -cp docdiff.jar + +scala> C2.occurrences(List("a", "b", "b")) +... +\end{lstlisting}%$ + +\subsection*{Hints} + +\noindent +\textbf{For the Core Part 2:} useful operations involving regular +expressions: +\[\texttt{reg.findAllIn(s).toList}\] +\noindent finds all +substrings in \texttt{s} according to a regular regular expression +\texttt{reg}; useful list operations: \texttt{.distinct} +removing duplicates from a list, \texttt{.count} counts the number of +elements in a list that satisfy some condition, \texttt{.toMap} +transfers a list of pairs into a Map, \texttt{.sum} adds up a list of +integers, \texttt{.max} calculates the maximum of a list.\bigskip + + + +\newpage +\subsection*{Core Part 2 (3 Marks, file docdiff.scala)} + +It seems plagiarism---stealing and submitting someone +else's code---is a serious problem at other +universities.\footnote{Surely, King's students, after all their + instructions and warnings, would never commit such an offence. Yes?} +Detecting such plagiarism is time-consuming and disheartening for +lecturers at those universities. To aid these poor souls, let's +implement in this part a program that determines the similarity +between two documents (be they source code or texts in English). A +document will be represented as a list of strings. + + +\subsection*{Tasks} + +\begin{itemize} +\item[(1)] Implement a function that `cleans' a string by finding all + (proper) words in the string. For this use the regular expression + \texttt{\textbackslash{}w+} for recognising words and the library function + \texttt{findAllIn}. The function should return a document (a list of + strings). + \mbox{}\hfill\mbox{[0.5 Marks]} + +\item[(2)] In order to compute the overlap between two documents, we + associate each document with a \texttt{Map}. This Map represents the + strings in a document and how many times these strings occur in the + document. A simple (though slightly inefficient) method for counting + the number of string-occurrences in a document is as follows: remove + all duplicates from the document; for each of these (unique) + strings, count how many times they occur in the original document. + Return a Map associating strings with occurrences. For example + + \begin{center} + \pcode{occurrences(List("a", "b", "b", "c", "d"))} + \end{center} + + produces \pcode{Map(a -> 1, b -> 2, c -> 1, d -> 1)} and + + \begin{center} + \pcode{occurrences(List("d", "b", "d", "b", "d"))} + \end{center} + + produces \pcode{Map(d -> 3, b -> 2)}.\hfill[1 Mark] + +\item[(3)] You can think of the Maps calculated under (2) as memory-efficient + representations of sparse ``vectors''. In this subtask you need to + implement the \emph{product} of two such vectors, sometimes also called + \emph{dot product} of two vectors.\footnote{\url{https://en.wikipedia.org/wiki/Dot_product}} + + For this dot product, implement a function that takes two documents + (\texttt{List[String]}) as arguments. The function first calculates + the (unique) strings in both. For each string, it multiplies the + corresponding occurrences in each document. If a string does not + occur in one of the documents, then the product for this string is zero. At the end + you need to add up all products. For the two documents in (2) the dot + product is 7, because + + \[ + \underbrace{1 * 0}_{"a"} \;\;+\;\; + \underbrace{2 * 2}_{"b"} \;\;+\;\; + \underbrace{1 * 0}_{"c"} \;\;+\;\; + \underbrace{1 * 3}_{"d"} \qquad = 7 + \] + + \hfill\mbox{[1 Mark]} + +\item[(4)] Implement first a function that calculates the overlap + between two documents, say $d_1$ and $d_2$, according to the formula + + \[ + \texttt{overlap}(d_1, d_2) = \frac{d_1 \cdot d_2}{max(d_1^2, d_2^2)} + \] + + where $d_1^2$ means $d_1 \cdot d_1$ and so on. + You can expect this function to return a \texttt{Double} between 0 and 1. The + overlap between the lists in (2) is $0.5384615384615384$. + + Second, implement a function that calculates the similarity of + two strings, by first extracting the substrings using the clean + function from (1) + and then calculating the overlap of the resulting documents.\\ + \mbox{}\hfill\mbox{[0.5 Marks]} +\end{itemize} + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 017f621f5835 -r 3ffe978a5664 cws/core_cw03.pdf Binary file cws/core_cw03.pdf has changed diff -r 017f621f5835 -r 3ffe978a5664 cws/core_cw03.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cws/core_cw03.tex Fri Nov 05 16:47:55 2021 +0000 @@ -0,0 +1,181 @@ +% !TEX program = xelatex +\documentclass{article} +\usepackage{../style} +\usepackage{../langs} +\usepackage{disclaimer} +\usepackage{tikz} +\usepackage{pgf} +\usepackage{pgfplots} +\usepackage{stackengine} +%% \usepackage{accents} +\newcommand\barbelow[1]{\stackunder[1.2pt]{#1}{\raisebox{-4mm}{\boldmath$\uparrow$}}} +\usepackage{ulem} + +\begin{document} + +% BF IDE +% https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5 + +\section*{Core Part 3 (Scala, 3 Marks)} + +\mbox{}\hfill\textit{``[Google’s MapReduce] abstraction is inspired by the}\\ +\mbox{}\hfill\textit{map and reduce primitives present in Lisp and many}\\ +\mbox{}\hfill\textit{other functional languages.''}\smallskip\\ +\mbox{}\hfill\textit{ --- Dean and Ghemawat, who designed this concept at Google} +\bigskip + +\IMPORTANT{This part is about the shunting yard algorithm by Dijkstra. + The preliminary part is due on \sout{\cwEIGHT{}} \textcolor{red}{11 December} + at 5pm and worth 3\%. + Any 1\% you achieve in the preliminary part counts as your + ``weekly engagement''.} + +\noindent +Also note that the running time of each part will be restricted to a +maximum of 30 seconds on my laptop. + +\DISCLAIMER{} + +\subsection*{Reference Implementation} + +This Scala assignment comes with two reference implementations in +form of \texttt{jar}-files. This allows +you to run any test cases on your own computer. For example you can +call Scala on the command line with the option \texttt{-cp + postfix.jar} and then query any function from the +\texttt{postfix.scala} file (similarly for file \texttt{postfix2.scala}). As +usual you have to prefix the calls with \texttt{C3a} and +\texttt{C3b}, respectively. + + +\begin{lstlisting}[xleftmargin=1mm,numbers=none,basicstyle=\ttfamily\small] +$ scala -cp postfix.jar + +scala> C3a.syard(C3a.split("( 5 + 7 ) * 2")) +val res0: C3a.Toks = List(5, 7, +, 2, *) +\end{lstlisting}%$ + + +\subsection*{Hints} + +\noindent +\textbf{For Core Part 3:} useful operations for determining +whether a string is a number are \texttt{.forall} and \texttt{.isDigit}. +One way to calculate the the power operation is to use \texttt{.pow} +on \texttt{BigInt}s, like \texttt{BigInt(n).pow(m).toInt}. +\bigskip + + +\subsection*{Core Part (3 Marks, files postfix.scala, postfix2.scala)} + +The \emph{Shunting Yard Algorithm} has been developed by Edsger Dijkstra, +an influential computer scientist who developed many well-known +algorithms. This algorithm transforms the usual infix notation of +arithmetic expressions into the postfix notation, sometimes also +called reverse Polish notation. + +Why on Earth do people use the postfix notation? It is much more +convenient to work with the usual infix notation for arithmetic +expressions. Most modern pocket calculators (as opposed to the ones used 20 +years ago) understand infix notation. So why on Earth? \ldots{}Well, +many computers under the hood, even nowadays, use postfix notation +extensively. For example if you give to the Java compiler the +expression $1 + ((2 * 3) + (4 - 3))$, it will generate the Java Byte +code + +\begin{lstlisting}[language=JVMIS,numbers=none] +ldc 1 +ldc 2 +ldc 3 +imul +ldc 4 +ldc 3 +isub +iadd +iadd +\end{lstlisting} + +\noindent +where the command \texttt{ldc} loads a constant onto the stack, and \texttt{imul}, +\texttt{isub} and \texttt{iadd} are commands acting on the stack. Clearly this +is the arithmetic expression in postfix notation.\bigskip + +\noindent +The shunting yard algorithm processes an input token list using an +operator stack and an output list. The input consists of numbers, +operators ($+$, $-$, $*$, $/$) and parentheses, and for the purpose of +the assignment we assume the input is always a well-formed expression +in infix notation. The calculation in the shunting yard algorithm uses +information about the +precedences of the operators (given in the template file). The +algorithm processes the input token list as follows: + +\begin{itemize} +\item If there is a number as input token, then this token is + transferred directly to the output list. Then the rest of the input is + processed. +\item If there is an operator as input token, then you need to check + what is on top of the operator stack. If there are operators with + a higher or equal precedence, these operators are first popped off + from the stack and moved to the output list. Then the operator from the input + is pushed onto the stack and the rest of the input is processed. +\item If the input is a left-parenthesis, you push it on to the stack + and continue processing the input. +\item If the input is a right-parenthesis, then you pop off all operators + from the stack to the output list until you reach the left-parenthesis. + Then you discharge the $($ and $)$ from the input and stack, and continue + processing the input list. +\item If the input is empty, then you move all remaining operators + from the stack to the output list. +\end{itemize} + +\subsubsection*{Tasks (file postfix.scala)} + +\begin{itemize} +\item[(1)] Implement the shunting yard algorithm described above. The + function, called \texttt{syard}, takes a list of tokens as first + argument. The second and third arguments are the stack and output + list represented as Scala lists. The most convenient way to + implement this algorithm is to analyse what the input list, stack + and output list look like in each step using pattern-matching. The + algorithm transforms for example the input + + \[ + \texttt{List(3, +, 4, *, (, 2, -, 1, ))} + \] + + into the postfix output + + \[ + \texttt{List(3, 4, 2, 1, -, *, +)} + \] + + You can assume the input list is always a list representing + a well-formed infix arithmetic expression.\hfill[1 Mark] + +\item[(2)] Implement a compute function that takes a postfix expression + as argument and evaluates it generating an integer as result. It uses a + stack to evaluate the postfix expression. The operators $+$, $-$, $*$ + are as usual; $/$ is division on integers, for example $7 / 3 = 2$. + \hfill[1 Mark] +\end{itemize} + +\subsubsection*{Task (file postfix2.scala)} + +\begin{itemize} +\item[(3/4)] Extend the code in (1) and (2) to include the power + operator. This requires proper account of associativity of + the operators. The power operator is right-associative, whereas the + other operators are left-associative. Left-associative operators + are popped off if the precedence is bigger or equal, while + right-associative operators are only popped off if the precedence is + bigger.\hfill[1 Marks] +\end{itemize} + +\end{document} + + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff -r 017f621f5835 -r 3ffe978a5664 cws/main_cw01.pdf Binary file cws/main_cw01.pdf has changed diff -r 017f621f5835 -r 3ffe978a5664 cws/main_cw01.tex --- a/cws/main_cw01.tex Thu Nov 04 12:20:12 2021 +0000 +++ b/cws/main_cw01.tex Fri Nov 05 16:47:55 2021 +0000 @@ -33,19 +33,19 @@ function from the template file. Say you want to find out what the function \code{get_january_data} produces: for this you just need to prefix them with the object name -\texttt{CW6b} and call them with some arguments: +\texttt{M1} and call them with some arguments: \begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small] $ scala -cp drumb.jar -scala> CW6b.get_january_data("FB", 2014) +scala> M1.get_january_data("FB", 2014) val res2: List[String] = List(2014-01-02,54.709999,....) \end{lstlisting}%$ \subsection*{Hints} \noindent -\textbf{For the Main Part:} useful string functions: +\textbf{For Main Part 1:} useful string functions: \texttt{.startsWith(...)} for checking whether a string has a given prefix, \texttt{\_ ++ \_} for concatenating two strings; useful option functions: \texttt{.flatten} flattens a list of options such that it @@ -58,13 +58,13 @@ \texttt{.split(",").toList} for splitting strings according to a comma.\bigskip -\noindent +\noindent\alert \textbf{Note!} Fortunately Scala supports operator overloading. But make sure you understand the difference between \texttt{100 / 3} and \texttt{100.0 / 3}! \newpage -\subsection*{Main Part (7 Marks, file drumb.scala)} +\subsection*{Main Part 1 (7 Marks, file drumb.scala)} A purely fictional character named Mr T.~Drumb inherited in 1978 approximately 200 Million Dollar from his father. Mr Drumb prides @@ -102,7 +102,13 @@ severely rate-limited. Therefore this part comes with a number of files containing CSV-lists with the historical stock prices for the companies in our portfolios. Use these files for the following -tasks.\bigskip +tasks.\medskip + +\noindent\alert +\textbf{Note:} Do not hardcode the path to the CSV-files. The testing +framework will assume that these files are in the same directory as the +drumb.scala file. +\bigskip \newpage \noindent @@ -231,7 +237,9 @@ of companies that went bust or were de-listed over the years. So where does this leave our fictional character Mr T.~Drumb? Well, given his inheritance, a really dumb investment strategy would have done -equally well, if not much better.\medskip +equally well, if not much better. And one would assume this guy is +by now locked up in prison and the key thrown away, but alas he +is still around annoying commonsense people.\medskip \end{document} diff -r 017f621f5835 -r 3ffe978a5664 cws/main_cw02.tex --- a/cws/main_cw02.tex Thu Nov 04 12:20:12 2021 +0000 +++ b/cws/main_cw02.tex Fri Nov 05 16:47:55 2021 +0000 @@ -9,7 +9,7 @@ %% should ask to lower case the words. -\section*{Main Part 2 (Scala, 7 Marks)} +\section*{Main Part 2 (Scala, 6 Marks)} \noindent @@ -39,7 +39,7 @@ the command line with the option \texttt{-cp danube.jar} and then query any function from the template file. Say you want to find out what the function \texttt{} produces: for this you just need -to prefix it with the object name \texttt{CW7b}. If you want to find out what +to prefix it with the object name \texttt{M2}. If you want to find out what these functions produce for the list \texttt{List("a", "b", "b")}, you would type something like: @@ -48,7 +48,7 @@ scala> val ratings_url = | """https://nms.kcl.ac.uk/christian.urban/ratings.csv""" -scala> CW7b.get_csv_url(ratings_url) +scala> M2.get_csv_url(ratings_url) val res0: List[String] = List(1,1,4 ...) \end{lstlisting}%$ @@ -70,7 +70,7 @@ \newpage -\subsection*{Main Part 2 (7 Marks, file danube.scala)} +\subsection*{Main Part 2 (6 Marks, file danube.scala)} You are creating Danube.co.uk which you hope will be the next big thing in online movie renting. You know that you can save money by @@ -189,17 +189,17 @@ then return fewer than two movie names.\\ \mbox{}\hfill [1 Mark] -\item[(7)] Calculate the recommendations for all movies according to - what the recommendations function in (6) produces (this - can take a few seconds). Put all recommendations into a list - (of strings) and count how often the strings occur in - this list. This produces a list of string-int pairs, - where the first component is the movie name and the second - is the number of how many times the movie was recommended. - Sort all the pairs according to the number - of times they were recommended (most recommended movie name - first).\\ - \mbox{}\hfill [1 Mark] +%\item[(7)] Calculate the recommendations for all movies according to +% what the recommendations function in (6) produces (this +% can take a few seconds). Put all recommendations into a list +% (of strings) and count how often the strings occur in +% this list. This produces a list of string-int pairs, +% where the first component is the movie name and the second +% is the number of how many times the movie was recommended. +% Sort all the pairs according to the number +% of times they were recommended (most recommended movie name +% first).\\ +% \mbox{}\hfill [1 Mark] \end{itemize} diff -r 017f621f5835 -r 3ffe978a5664 cws/main_cw03.tex --- a/cws/main_cw03.tex Thu Nov 04 12:20:12 2021 +0000 +++ b/cws/main_cw03.tex Fri Nov 05 16:47:55 2021 +0000 @@ -119,7 +119,7 @@ % BF IDE % https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5 -\section*{Main Part 3 (Scala, 7 Marks)} +\section*{Main Part 3 (Scala, 6 Marks)} %\mbox{}\hfill\textit{``[Google’s MapReduce] abstraction is inspired by the}\\ %\mbox{}\hfill\textit{map and reduce primitives present in Lisp and many}\\ @@ -150,7 +150,7 @@ computer. For example you can call Scala on the command line with the option \texttt{-cp re.jar} and then query any function from the \texttt{re.scala} template file. As usual you have to prefix the calls -with \texttt{CW8c} or import this object. Since some tasks +with \texttt{M3} or import this object. Since some tasks are time sensitive, you can check the reference implementation as follows: if you want to know, for example, how long it takes to match strings of $a$'s using the regular expression $(a^*)^*\cdot b$ you can @@ -159,7 +159,7 @@ \begin{lstlisting}[xleftmargin=1mm,numbers=none,basicstyle=\ttfamily\small] $ scala -cp re.jar -scala> import CW8c._ +scala> import M3._ scala> for (i <- 0 to 5000000 by 500000) { | println(f"$i: ${time_needed(2, matcher(EVIL, "a" * i))}%.5f secs.") | } @@ -230,12 +230,21 @@ $\ZERO$ & $\mapsto$ & \texttt{ZERO}\\ $\ONE$ & $\mapsto$ & \texttt{ONE}\\ $c$ & $\mapsto$ & \texttt{CHAR(c)}\\ + $\sum rs$ & $\mapsto$ & \texttt{ALTs(rs)}\\ $r_1 + r_2$ & $\mapsto$ & \texttt{ALT(r1, r2)} & \texttt{r1 | r2}\\ $r_1 \cdot r_2$ & $\mapsto$ & \texttt{SEQ(r1, r2)} & \texttt{r1 $\sim$ r2}\\ $r^*$ & $\mapsto$ & \texttt{STAR(r)} & \texttt{r.\%} \end{tabular} \end{center} +\noindent +The alternative regular expressions comes in two versions: one is +binary (+ / \texttt{ALT}) and the other is n-ary ($\sum$ / +\texttt{ALTs}). The latter takes a list of regular expressions as +arguments. In what follows we shall use $rs$ to stand for lists of +regular expressions. The binary alternative can be seen as an abbreviation, +that is $r_1 + r_2 \dn \sum\,[r_1, r_2]$. As a result we can ignore the +binary version and only implement the n-ary alternative. \begin{itemize} \item[(1)] Implement a function, called \textit{nullable}, by @@ -250,11 +259,11 @@ $\textit{nullable}(\ZERO)$ & $\dn$ & $\textit{false}$\\ $\textit{nullable}(\ONE)$ & $\dn$ & $\textit{true}$\\ $\textit{nullable}(c)$ & $\dn$ & $\textit{false}$\\ -$\textit{nullable}(r_1 + r_2)$ & $\dn$ & $\textit{nullable}(r_1) \vee \textit{nullable}(r_2)$\\ +$\textit{nullable}(\sum rs)$ & $\dn$ & $\exists r \in rs.\;\textit{nullable}(r)$\\ $\textit{nullable}(r_1 \cdot r_2)$ & $\dn$ & $\textit{nullable}(r_1) \wedge \textit{nullable}(r_2)$\\ $\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\\ \end{tabular} -\end{center}~\hfill[1 Mark] +\end{center}~\hfill[0.5 Marks] \item[(2)] Implement a function, called \textit{der}, by recursion over regular expressions. It takes a character and a regular expression @@ -266,7 +275,7 @@ $\textit{der}\;c\;(\ZERO)$ & $\dn$ & $\ZERO$\\ $\textit{der}\;c\;(\ONE)$ & $\dn$ & $\ZERO$\\ $\textit{der}\;c\;(d)$ & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;\ONE \; \textit{else} \;\ZERO$\\ -$\textit{der}\;c\;(r_1 + r_2)$ & $\dn$ & $(\textit{der}\;c\;r_1) + (\textit{der}\;c\;r_2)$\\ +$\textit{der}\;c\;(\sum\;[r_1,..,r_n])$ & $\dn$ & $\sum\;[\textit{der}\;c\;r_1,..,\textit{der}\;c\;r_n]$\\ $\textit{der}\;c\;(r_1 \cdot r_2)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r_1)$\\ & & $\textit{then}\;((\textit{der}\;c\;r_1)\cdot r_2) + (\textit{der}\;c\;r_2)$\\ & & $\textit{else}\;(\textit{der}\;c\;r_1)\cdot r_2$\\ @@ -312,7 +321,32 @@ Note, the last derivative can match the empty string, that is it is \textit{nullable}.\\ \mbox{}\hfill\mbox{[1 Mark]} -\item[(3)] Implement the function \textit{simp}, which recursively +\item[(3)] We next want to simplify regular expressions: essentially + we want to remove $\ZERO$ in regular expressions like $r + \ZERO$ + and $\ZERO + r$. However, our n-ary alternative takes a list of + regular expressions as argument, we therefore need a more general + ``flatten'' function, called \texttt{flts}. This function should + analyse a list of regular expressions, say $rs$, as follows: + + \begin{center} + \begin{tabular}{lllll} + 1) &$rs = []$ & $\dn$ & $[]$ & (empty list)\\ + 2) &$rs = \ZERO :: rest$ & $\dn$ & $\textrm{flatten}\;rest$\\ + 3) &$rs = (\sum rs_1) :: rest$ & $\dn$ & $rs_1 ::: \textrm{flatten}\;rest$\\ + 4) &$rs = r :: rest$ & $\dn$ & $r :: \textrm{flatten}\;rest$ & (otherwise)\\ + \end{tabular} + \end{center} + + The first clause just states that empty lists cannot be further + flattened. The second removes all $\ZERO$s from the list. + The third is when the first regular expression is an \texttt{ALTs}, then + the content of this alternative should be spilled out and appended + with the flattened rest of the list. The last case is for all other + cases where the head of the list is not $\ZERO$ and not an \texttt{ALTs}, + then we just keep the head of the list and flatten the rest. + \mbox{}\hfill\mbox{[1 Mark]} + +\item[(4)] Implement the function \textit{simp}, which recursively traverses a regular expression, and on the way up simplifies every regular expression on the left (see below) to the regular expression on the right, except it does not simplify inside ${}^*$-regular @@ -324,31 +358,35 @@ $\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\ $r \cdot \ONE$ & $\mapsto$ & $r$\\ $\ONE \cdot r$ & $\mapsto$ & $r$\\ -$r + \ZERO$ & $\mapsto$ & $r$\\ -$\ZERO + r$ & $\mapsto$ & $r$\\ -$r + r$ & $\mapsto$ & $r$\\ +$\sum\;[r_1,..,r_n]$ & $\mapsto$ & $\sum\;(\texttt{(flts + distinct)}[simp(r_1),..,simp(r_n)])$\\ \end{tabular} - \end{center} +\end{center} + + The last case is as follows: first apply $simp$ to all regular expressions + $r_1,.. ,r_n$; then flatten the resulting list using \texttt{flts}; + finally remove all duplicates in this list (this can be done in Scala + using the function \texttt{\_.distinct}). For example the regular expression \[(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)\] simplifies to just $r_1$. \textbf{Hint:} Regular expressions can be seen as trees and there are several methods for traversing - trees. One of them corresponds to the inside-out traversal, which is also - sometimes called post-order tra\-versal: you traverse inside the - tree and on the way up you apply simplification rules. - \textbf{Another Hint:} - Remember numerical expressions from school times---there you had expressions - like $u + \ldots + (1 \cdot x) - \ldots (z + (y \cdot 0)) \ldots$ - and simplification rules that looked very similar to rules - above. You would simplify such numerical expressions by replacing - for example the $y \cdot 0$ by $0$, or $1\cdot x$ by $x$, and then - look whether more rules are applicable. If you organise the - simplification in an inside-out fashion, it is always clear which - simplification should be applied next.\hfill[1 Mark] + trees. One of them corresponds to the inside-out traversal, which is + also sometimes called post-order tra\-versal: you traverse inside + the tree and on the way up you apply simplification rules. + \textbf{Another Hint:} Remember numerical expressions from school + times---there you had expressions like + $u + \ldots + (1 \cdot x) - \ldots (z + (y \cdot 0)) \ldots$ and + simplification rules that looked very similar to rules above. You + would simplify such numerical expressions by replacing for example + the $y \cdot 0$ by $0$, or $1\cdot x$ by $x$, and then look whether + more rules are applicable. If you regard regular expressions as + trees and if you organise the simplification in an inside-out + fashion, it is always clear which simplification should be applied + next.\mbox{}\hfill\mbox{[1 Mark]} -\item[(4)] Implement two functions: The first, called \textit{ders}, +\item[(5)] Implement two functions: The first, called \textit{ders}, takes a list of characters and a regular expression as arguments, and builds the derivative w.r.t.~the list as follows: @@ -371,7 +409,7 @@ true for the regular expression $(a\cdot b)\cdot c$ and the string $abc$, but false if you give it the string $ab$. \hfill[1 Mark] -\item[(5)] Implement a function, called \textit{size}, by recursion +\item[(6)] Implement a function, called \textit{size}, by recursion over regular expressions. If a regular expression is seen as a tree, then \textit{size} should return the number of nodes in such a tree. Therefore this function is defined as follows: @@ -381,7 +419,7 @@ $\textit{size}(\ZERO)$ & $\dn$ & $1$\\ $\textit{size}(\ONE)$ & $\dn$ & $1$\\ $\textit{size}(c)$ & $\dn$ & $1$\\ -$\textit{size}(r_1 + r_2)$ & $\dn$ & $1 + \textit{size}(r_1) + \textit{size}(r_2)$\\ +$\textit{size}(\sum\,[r_1,..,r_n]$ & $\dn$ & $1 + \textit{size}(r_1) + ... + \textit{size}(r_n)$\\ $\textit{size}(r_1 \cdot r_2)$ & $\dn$ & $1 + \textit{size}(r_1) + \textit{size}(r_2)$\\ $\textit{size}(r^*)$ & $\dn$ & $1 + \textit{size}(r)$\\ \end{tabular} @@ -391,9 +429,9 @@ expression $(a^*)^* \cdot b$ grows when taking successive derivatives according the letter $a$ without simplification and then compare it to taking the derivative, but simplify the result. The sizes -are given in \texttt{re.scala}. \hfill[1 Mark] +are given in \texttt{re.scala}. \hfill[0.5 Marks] -\item[(6)] You do not have to implement anything specific under this +\item[(7)] You do not have to implement anything specific under this task. The purpose here is that you will be marked for some ``power'' test cases. For example can your matcher decide within 30 seconds whether the regular expression $(a^*)^*\cdot b$ matches strings of the @@ -406,20 +444,22 @@ \noindent correctly to just \texttt{ONE}, where \texttt{SEQ} is nested 50 or more times?\\ - \mbox{}\hfill[2 Marks] + \mbox{}\hfill[1 Mark] \end{itemize} \subsection*{Background} -Although easily implementable in Scala, the idea behind the derivative -function might not so easy to be seen. To understand its purpose -better, assume a regular expression $r$ can match strings of the form -$c\!::\!cs$ (that means strings which start with a character $c$ and have -some rest, or tail, $cs$). If you take the derivative of $r$ with -respect to the character $c$, then you obtain a regular expression -that can match all the strings $cs$. In other words, the regular -expression $\textit{der}\;c\;r$ can match the same strings $c\!::\!cs$ -that can be matched by $r$, except that the $c$ is chopped off. +Although easily implementable in Scala (ok maybe the \texttt{simp} functions and +\texttt{ALTs} needs a bit more thinking), the idea behind the +derivative function might not so easy to be seen. To understand its +purpose better, assume a regular expression $r$ can match strings of +the form $c\!::\!cs$ (that means strings which start with a character +$c$ and have some rest, or tail, $cs$). If you take the derivative of +$r$ with respect to the character $c$, then you obtain a regular +expression that can match all the strings $cs$. In other words, the +regular expression $\textit{der}\;c\;r$ can match the same strings +$c\!::\!cs$ that can be matched by $r$, except that the $c$ is chopped +off. Assume now $r$ can match the string $abc$. If you take the derivative according to $a$ then you obtain a regular expression that can match @@ -451,12 +491,13 @@ Java\footnote{Version 8 and below; Version 9 and above does not seem to be as catastrophic, but still much worse than the regular expression matcher based on derivatives.}, JavaScript and Python with the -`evil' regular expression $(a^*)^*\cdot b$, then have a look at the +``evil'' regular expression $(a^*)^*\cdot b$, then have a look at the graphs below (you can try it out for yourself: have a look at the files \texttt{catastrophic9.java}, \texttt{catastrophic.js}, \texttt{catastrophic.py} etc on KEATS). Compare this with the matcher you -have implemented. How long can the string of $a$'s be in your matcher -and still stay within the 30 seconds time limit? +have implemented. How long can a string of $a$'s be in your matcher +and still stay within the 30 seconds time limit? It should be muuuch better +than your off-the-shelf matcher in your bog-standard language. \begin{center} \begin{tabular}{@{}cc@{}} @@ -477,7 +518,7 @@ scaled ticks=false, axis lines=left, width=6cm, - height=5.5cm, + height=5.0cm, legend entries={Python, Java 8, JavaScript, Swift, Dart}, legend pos=north west, legend cell align=left] @@ -503,7 +544,7 @@ scaled ticks=false, axis lines=left, width=6cm, - height=5.5cm, + height=5.0cm, legend entries={Java 9}, legend pos=north west] \addplot[cyan,mark=*, mark options={fill=white}] table {re-java9.data}; diff -r 017f621f5835 -r 3ffe978a5664 cws/pre_cw01.pdf Binary file cws/pre_cw01.pdf has changed diff -r 017f621f5835 -r 3ffe978a5664 cws/pre_cw01.tex --- a/cws/pre_cw01.tex Thu Nov 04 12:20:12 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,299 +0,0 @@ -% !TEX program = xelatex -\documentclass{article} -\usepackage{../style} -\usepackage{disclaimer} -\usepackage{../langs} - - - -\begin{document} - -\section*{Preliminary Part 1 (Scala, 3 Marks)} - -\mbox{}\hfill\textit{``The most effective debugging tool is still careful thought,}\\ -\mbox{}\hfill\textit{coupled with judiciously placed print statements.''}\smallskip\\ -\mbox{}\hfill\textit{ --- Brian W. Kernighan, in Unix for Beginners (1979)}\bigskip - - -\IMPORTANT{This part is about Scala. It is due on \cwSIX{} at 5pm and worth 3\%. -Any 1\% you achieve in the preliminary part counts as your ``weekly engagement''.} - -\noindent -Also note that the running time of each part will be restricted to a -maximum of 30 seconds on my laptop. - -\DISCLAIMER{} - -\subsection*{Reference Implementation} - -Like the C++ assignments, the Scala assignments will work like this: you -push your files to GitHub and receive (after sometimes a long delay) some -automated feedback. In the end we take a snapshot of the submitted files and -apply an automated marking script to them.\medskip - -\noindent -In addition, the Scala coursework comes with a reference implementation -in form of \texttt{jar}-files. This allows you to run any test cases on -your own computer. For example you can call Scala on the command line -with the option \texttt{-cp collatz.jar} and then query any function -from the template file. Say you want to find out what the functions -\texttt{collatz} and \texttt{collatz\_max} produce: for this you just -need to prefix them with the object name \texttt{CW6a}. If you want to -find out what these functions produce for the argument \texttt{6}, you -would type something like: - -\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small] -$ scala -cp collatz.jar - -scala> CW6a.collatz(6) -... -scala> CW6a.collatz_max(6) -... -\end{lstlisting}%$ - -\subsection*{Hints} - -\noindent -\textbf{For the Preliminary Part:} useful math operators: \texttt{\%} for modulo, \texttt{\&} for bit-wise and; useful -functions: \mbox{\texttt{(1\,to\,10)}} for ranges, \texttt{.toInt}, -\texttt{.toList} for conversions, you can use \texttt{List(...).max} for the -maximum of a list, \texttt{List(...).indexOf(...)} for the first index of -a value in a list.\bigskip - - - -\newpage -\subsection*{Preliminary Part (3 Marks, file collatz.scala)} - -This part is about function definitions and recursion. You are asked -to implement a Scala program that tests examples of the -\emph{$3n + 1$-conjecture}, also called \emph{Collatz - conjecture}.\video{https://www.youtube.com./watch?v=LqKpkdRRLZw} -This conjecture can be described as follows: Start with any positive -number $n$ greater than $0$: - -\begin{itemize} -\item If $n$ is even, divide it by $2$ to obtain $n / 2$. -\item If $n$ is odd, multiply it by $3$ and add $1$ to obtain $3n + - 1$. -\item Repeat this process and you will always end up with $1$. -\end{itemize} - -\noindent -For example if you start with, say, $6$ and $9$, you obtain the -two \emph{Collatz series} -% -\[ -\begin{array}{@{}l@{\hspace{5mm}}l@{}} -6, 3, 10, 5, 16, 8, 4, 2, 1 & \text{(= 8 steps)}\\ -9, 28, 14, 7, 22, 11, 34, 17, 52, 26, 13, 40, 20, 10, 5, 16, 8, 4, 2, 1 & \text{(= 19 steps)}\\ -\end{array} -\] - -\noindent -As you can see, the numbers go up and down like a roller-coaster, but -curiously they seem to always terminate in $1$. Nobody knows why. The -conjecture is that this will \emph{always} happen for every number -greater than 0.\footnote{While it is relatively easy to test this -conjecture with particular numbers, it is an interesting open problem to -\emph{prove} that the conjecture is true for \emph{all} numbers ($> 0$). -Paul Erd\"o{}s, a famous mathematician you might have heard about, said -about this conjecture: ``Mathematics may not [yet] be ready for such -problems.'' and also offered a \$500 cash prize for its solution. -Jeffrey Lagarias, another mathematician, claimed that based only on -known information about this problem, ``this is an extraordinarily -difficult problem, completely out of reach of present day mathematics.'' -There is also a \href{https://xkcd.com/710/}{xkcd} cartoon about this -conjecture\here{https://xkcd.com/710/}). If you are able to solve this -conjecture, you will definitely get famous.}\bigskip - -\noindent -\textbf{Tasks} - -\begin{itemize} -\item[(1)] You are asked to implement a recursive function that - calculates the number of steps needed until a series ends - with $1$. In case of starting with $6$, it takes $8$ steps and in - case of starting with $9$, it takes $19$ (see above). We assume it - takes $0$ steps, if we start with $1$. In order to - try out this function with large numbers, you should use - \texttt{Long} as argument type, instead of \texttt{Int}. You can - assume this function will be called with numbers between $1$ and - $1$ Million. \hfill[1 Mark] - -\item[(2)] Write a second function that takes an upper bound as - an argument and calculates the steps for all numbers in the range from - 1 up to this bound (the bound including). It returns the maximum number of - steps and the corresponding number that needs that many steps. More - precisely it returns a pair where the first component is the number - of steps and the second is the corresponding number. \hfill\mbox{[1 - Mark]} - -\item[(3)] Write a function that calculates \emph{hard - numbers} \here{https://medium.com/cantors-paradise/the-collatz-conjecture-some-shocking-results-from-180-000-iterations-7fea130d0377} - in the Collatz series---these are the last odd numbers just before a - power of two is reached. For this, implement an - \textit{is-power-of-two} function which tests whether a number is a - power of two. The easiest way to implement this is by using the - bit-operator $\&$ of Scala. For a power of two, say $n$ with $n > 0$, it - holds that $n \;\&\; (n - 1)$ is equal to zero. I let you think why - this is the case. - - The function \textit{is-hard} calculates whether - $3n + 1$ is a power of two. Finally the \textit{last-odd} function - calculates the last odd number before a power of 2 in the Collatz - series. This means for example when starting with 9, - we receive 5 as the last odd number. Surprisingly a lot of numbers - have 5 as last-odd number. But for example for 113 we obtain 85, - because of the series - % - \[113, 340, 170, \,\fbox{85}\,, 256, 128, 64, 32, 16, 8, 4, 2, 1\] - - The \textit{last-odd} function will only be called with numbers that are not - powers of 2 themselves. -\end{itemize} - -\noindent -\textbf{Test Data:} Some test ranges and cases are: - -\begin{itemize} -\item 1 to 10 where $9$ takes 19 steps -\item 1 to 100 where $97$ takes 118 steps, -\item 1 to 1,000 where $871$ takes 178 steps, -\item 1 to 10,000 where $6,171$ takes 261 steps, -\item 1 to 100,000 where $77,031$ takes 350 steps, -\item 1 to 1 Million where $837,799$ takes 524 steps - %% runs out of stack space - %% \item[$\bullet$] $1 - 10$ million where $8,400,511$ takes 685 steps -\item 21 is the last odd number for 84 -\item 341 is the last odd number for 201, 604, 605 and 8600 - -\end{itemize} - - - - - - -\end{document} - - -%%%%%%% Historical Stuff -\newpage - -This part is about web-scraping and list-processing in Scala. It uses -online data about the per-capita alcohol consumption for each country -(per year?), and a file containing the data about the population size of -each country. From this data you are supposed to estimate how many -litres of pure alcohol are consumed worldwide.\bigskip - -\noindent -\textbf{Tasks (file alcohol.scala):} - -\begin{itemize} -\item[(1)] Write a function that given an URL requests a - comma-separated value (CSV) list. We are interested in the list - from the following URL - -\begin{center} - \url{https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv} -\end{center} - -\noindent Your function should take a string (the URL) as input, and -produce a list of strings as output, where each string is one line in -the corresponding CSV-list. This list from the URL above should -contain 194 lines.\medskip - -\noindent -Write another function that can read the file \texttt{population.csv} -from disk (the file is distributed with the assignment). This -function should take a string as argument, the file name, and again -return a list of strings corresponding to each entry in the -CSV-list. For \texttt{population.csv}, this list should contain 216 -lines.\hfill[1 Mark] - - -\item[(2)] Unfortunately, the CSV-lists contain a lot of ``junk'' and we - need to extract the data that interests us. From the header of the - alcohol list, you can see there are 5 columns - - \begin{center} - \begin{tabular}{l} - \texttt{country (name),}\\ - \texttt{beer\_servings,}\\ - \texttt{spirit\_servings,}\\ - \texttt{wine\_servings,}\\ - \texttt{total\_litres\_of\_pure\_alcohol} - \end{tabular} - \end{center} - - \noindent - Write a function that extracts the data from the first column, - the country name, and the data from the fifth column (converted into - a \texttt{Double}). For this go through each line of the CSV-list - (except the first line), use the \texttt{split(",")} function to - divide each line into an array of 5 elements. Keep the data from the - first and fifth element in these arrays.\medskip - - \noindent - Write another function that processes the population size list. This - is already of the form country name and population size.\footnote{Your - friendly lecturer already did the messy processing for you from the - Worldbank database, see \url{https://github.com/datasets/population/tree/master/data} for the original.} Again, split the - strings according to the commas. However, this time generate a - \texttt{Map} from country names to population sizes.\hfill[1 Mark] - -\item[(3)] In (2) you generated the data about the alcohol consumption - per-capita for each country, and also the population size for each - country. From this generate next a sorted(!) list of the overall - alcohol consumption for each country. The list should be sorted from - highest alcohol consumption to lowest. The difficulty is that the - data is scraped off from ``random'' sources on the Internet and - annoyingly the spelling of some country names does not always agree in both - lists. For example the alcohol list contains - \texttt{Bosnia-Herzegovina}, while the population writes this country as - \texttt{Bosnia and Herzegovina}. In your sorted - overall list include only countries from the alcohol list, whose - exact country name is also in the population size list. This means - you can ignore countries like Bosnia-Herzegovina from the overall - alcohol consumption. There are 177 countries where the names - agree. The UK is ranked 10th on this list by - consuming 671,976,864 Litres of pure alcohol each year.\medskip - - \noindent - Finally, write another function that takes an integer, say - \texttt{n}, as argument. You can assume this integer is between 0 - and 177 (the number of countries in the sorted list above). The - function should return a triple, where the first component is the - sum of the alcohol consumption in all countries (on the list); the - second component is the sum of the \texttt{n}-highest alcohol - consumers on the list; and the third component is the percentage the - \texttt{n}-highest alcohol consumers drink with respect to the - the world consumption. You will see that according to our data, 164 - countries (out of 177) gobble up 100\% of the World alcohol - consumption.\hfill\mbox{[1 Mark]} -\end{itemize} - -\noindent -\textbf{Hints:} useful list functions: \texttt{.drop(n)}, -\texttt{.take(n)} for dropping or taking some elements in a list, -\texttt{.getLines} for separating lines in a string; -\texttt{.sortBy(\_.\_2)} sorts a list of pairs according to the second -elements in the pairs---the sorting is done from smallest to highest; -useful \texttt{Map} functions: \texttt{.toMap} converts a list of -pairs into a \texttt{Map}, \texttt{.isDefinedAt(k)} tests whether the -map is defined at that key, that is would produce a result when -called with this key; useful data functions: \texttt{Source.fromURL}, -\texttt{Source.fromFile} for obtaining a webpage and reading a file. - -\newpage - - - - - - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 017f621f5835 -r 3ffe978a5664 cws/pre_cw02.tex --- a/cws/pre_cw02.tex Thu Nov 04 12:20:12 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,161 +0,0 @@ -% !TEX program = xelatex -\documentclass{article} -\usepackage{../style} -\usepackage{disclaimer} -\usepackage{../langs} - -\begin{document} - - -%% should ask to lower case the words. - -\section*{Preliminary Part 2 (Scala, 3 Marks)} - -\mbox{}\hfill\textit{``What one programmer can do in one month,}\\ -\mbox{}\hfill\textit{two programmers can do in two months.''}\smallskip\\ -\mbox{}\hfill\textit{ --- Frederick P.~Brooks (author of The Mythical Man-Month)}\bigskip\medskip - -\IMPORTANT{You are asked to implement a Scala program for measuring similarity in - texts. The preliminary part is due on \cwSEVEN{} at 5pm and worth 3\%. - Any 1\% you achieve in the preliminary part counts as your ``weekly engagement''.} - -\noindent -Also note that the running time of each part will be restricted to a -maximum of 30 seconds on my laptop. - -\DISCLAIMER{} - - -\subsection*{Reference Implementation} - -Like the C++ part, the Scala part works like this: you -push your files to GitHub and receive (after sometimes a long delay) some -automated feedback. In the end we will take a snapshot of the submitted files and -apply an automated marking script to them.\medskip - -\noindent -In addition, the Scala part comes with reference -implementations in form of \texttt{jar}-files. This allows you to run -any test cases on your own computer. For example you can call Scala on -the command line with the option \texttt{-cp docdiff.jar} and then -query any function from the template file. Say you want to find out -what the function \texttt{occurrences} produces: for this you just need -to prefix it with the object name \texttt{CW7a}. If you want to find out what -these functions produce for the list \texttt{List("a", "b", "b")}, -you would type something like: - -\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small] -$ scala -cp docdiff.jar - -scala> CW7a.occurrences(List("a", "b", "b")) -... -\end{lstlisting}%$ - -\subsection*{Hints} - -\noindent -\textbf{For the Preliminary Part:} useful operations involving regular -expressions: -\[\texttt{reg.findAllIn(s).toList}\] -\noindent finds all -substrings in \texttt{s} according to a regular regular expression -\texttt{reg}; useful list operations: \texttt{.distinct} -removing duplicates from a list, \texttt{.count} counts the number of -elements in a list that satisfy some condition, \texttt{.toMap} -transfers a list of pairs into a Map, \texttt{.sum} adds up a list of -integers, \texttt{.max} calculates the maximum of a list.\bigskip - - - -\newpage -\subsection*{Preliminary Part (3 Marks, file docdiff.scala)} - -It seems source code plagiarism---stealing and submitting someone -else's code---is a serious problem at other -universities.\footnote{Surely, King's students, after all their - instructions and warnings, would never commit such an offence. Yes?} -Detecting such plagiarism is time-consuming and disheartening for -lecturers at those universities. To aid these poor souls, let's -implement in this part a program that determines the similarity -between two documents (be they source code or texts in English). A -document will be represented as a list of strings. - - -\subsection*{Tasks} - -\begin{itemize} -\item[(1)] Implement a function that `cleans' a string by finding all - (proper) words in this string. For this use the regular expression - \texttt{\textbackslash{}w+} for recognising words and the library function - \texttt{findAllIn}. The function should return a document (a list of - strings).\\ - \mbox{}\hfill [0.5 Marks] - -\item[(2)] In order to compute the overlap between two documents, we - associate each document with a \texttt{Map}. This Map represents the - strings in a document and how many times these strings occur in the - document. A simple (though slightly inefficient) method for counting - the number of string-occurrences in a document is as follows: remove - all duplicates from the document; for each of these (unique) - strings, count how many times they occur in the original document. - Return a Map associating strings with occurrences. For example - - \begin{center} - \pcode{occurrences(List("a", "b", "b", "c", "d"))} - \end{center} - - produces \pcode{Map(a -> 1, b -> 2, c -> 1, d -> 1)} and - - \begin{center} - \pcode{occurrences(List("d", "b", "d", "b", "d"))} - \end{center} - - produces \pcode{Map(d -> 3, b -> 2)}.\hfill[1 Mark] - -\item[(3)] You can think of the Maps calculated under (2) as memory-efficient - representations of sparse ``vectors''. In this subtask you need to - implement the \emph{product} of two such vectors, sometimes also called - \emph{dot product} of two vectors.\footnote{\url{https://en.wikipedia.org/wiki/Dot_product}} - - For this dot product, implement a function that takes two documents - (\texttt{List[String]}) as arguments. The function first calculates - the (unique) strings in both. For each string, it multiplies the - corresponding occurrences in each document. If a string does not - occur in one of the documents, then the product for this string is zero. At the end - you need to add up all products. For the two documents in (2) the dot - product is 7, because - - \[ - \underbrace{1 * 0}_{"a"} \;\;+\;\; - \underbrace{2 * 2}_{"b"} \;\;+\;\; - \underbrace{1 * 0}_{"c"} \;\;+\;\; - \underbrace{1 * 3}_{"d"} \qquad = 7 - \] - - \hfill\mbox{[1 Mark]} - -\item[(4)] Implement first a function that calculates the overlap - between two documents, say $d_1$ and $d_2$, according to the formula - - \[ - \texttt{overlap}(d_1, d_2) = \frac{d_1 \cdot d_2}{max(d_1^2, d_2^2)} - \] - - where $d_1^2$ means $d_1 \cdot d_1$ and so on. - You can expect this function to return a \texttt{Double} between 0 and 1. The - overlap between the lists in (2) is $0.5384615384615384$. - - Second, implement a function that calculates the similarity of - two strings, by first extracting the substrings using the clean - function from (1) - and then calculating the overlap of the resulting documents.\\ - \mbox{}\hfill\mbox{[0.5 Marks]} -\end{itemize} - - -\end{document} - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 017f621f5835 -r 3ffe978a5664 cws/pre_cw03.pdf Binary file cws/pre_cw03.pdf has changed diff -r 017f621f5835 -r 3ffe978a5664 cws/pre_cw03.tex --- a/cws/pre_cw03.tex Thu Nov 04 12:20:12 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,181 +0,0 @@ -% !TEX program = xelatex -\documentclass{article} -\usepackage{../style} -\usepackage{../langs} -\usepackage{disclaimer} -\usepackage{tikz} -\usepackage{pgf} -\usepackage{pgfplots} -\usepackage{stackengine} -%% \usepackage{accents} -\newcommand\barbelow[1]{\stackunder[1.2pt]{#1}{\raisebox{-4mm}{\boldmath$\uparrow$}}} -\usepackage{ulem} - -\begin{document} - -% BF IDE -% https://www.microsoft.com/en-us/p/brainf-ck/9nblgggzhvq5 - -\section*{Preliminary Part 3 (Scala, 3 Marks)} - -\mbox{}\hfill\textit{``[Google’s MapReduce] abstraction is inspired by the}\\ -\mbox{}\hfill\textit{map and reduce primitives present in Lisp and many}\\ -\mbox{}\hfill\textit{other functional languages.''}\smallskip\\ -\mbox{}\hfill\textit{ --- Dean and Ghemawat, who designed this concept at Google} -\bigskip - -\IMPORTANT{This part is about the shunting yard algorithm by Dijkstra. - The preliminary part is due on \sout{\cwEIGHT{}} \textcolor{red}{11 December} - at 5pm and worth 3\%. - Any 1\% you achieve in the preliminary part counts as your - ``weekly engagement''.} - -\noindent -Also note that the running time of each part will be restricted to a -maximum of 30 seconds on my laptop. - -\DISCLAIMER{} - -\subsection*{Reference Implementation} - -This Scala assignment comes with two reference implementations in -form of \texttt{jar}-files. This allows -you to run any test cases on your own computer. For example you can -call Scala on the command line with the option \texttt{-cp - postfix.jar} and then query any function from the -\texttt{postfix.scala} file (similarly for file \texttt{postfix2.scala}). As -usual you have to prefix the calls with \texttt{CW8a} and -\texttt{CW8b}, respectively. - - -\begin{lstlisting}[xleftmargin=1mm,numbers=none,basicstyle=\ttfamily\small] -$ scala -cp postfix.jar - -scala> CW8a.syard(CW8a.split("( 5 + 7 ) * 2")) -val res0: CW8a.Toks = List(5, 7, +, 2, *) -\end{lstlisting}%$ - - -\subsection*{Hints} - -\noindent -\textbf{For the Preliminary Part:} useful operations for determining -whether a string is a number are \texttt{.forall} and \texttt{.isDigit}. -One way to calculate the the power operation is to use \texttt{.pow} -on \texttt{BigInt}s, like \texttt{BigInt(n).pow(m).toInt}. -\bigskip - - -\subsection*{Preliminary Part (3 Marks, files postfix.scala, postfix2.scala)} - -The \emph{Shunting Yard Algorithm} has been developed by Edsger Dijkstra, -an influential computer scientist who developed many well-known -algorithms. This algorithm transforms the usual infix notation of -arithmetic expressions into the postfix notation, sometimes also -called reverse Polish notation. - -Why on Earth do people use the postfix notation? It is much more -convenient to work with the usual infix notation for arithmetic -expressions. Most modern pocket calculators (as opposed to the ones used 20 -years ago) understand infix notation. So why on Earth? \ldots{}Well, -many computers under the hood, even nowadays, use postfix notation -extensively. For example if you give to the Java compiler the -expression $1 + ((2 * 3) + (4 - 3))$, it will generate the Java Byte -code - -\begin{lstlisting}[language=JVMIS,numbers=none] -ldc 1 -ldc 2 -ldc 3 -imul -ldc 4 -ldc 3 -isub -iadd -iadd -\end{lstlisting} - -\noindent -where the command \texttt{ldc} loads a constant onto the stack, and \texttt{imul}, -\texttt{isub} and \texttt{iadd} are commands acting on the stack. Clearly this -is the arithmetic expression in postfix notation.\bigskip - -\noindent -The shunting yard algorithm processes an input token list using an -operator stack and an output list. The input consists of numbers, -operators ($+$, $-$, $*$, $/$) and parentheses, and for the purpose of -the assignment we assume the input is always a well-formed expression -in infix notation. The calculation in the shunting yard algorithm uses -information about the -precedences of the operators (given in the template file). The -algorithm processes the input token list as follows: - -\begin{itemize} -\item If there is a number as input token, then this token is - transferred directly to the output list. Then the rest of the input is - processed. -\item If there is an operator as input token, then you need to check - what is on top of the operator stack. If there are operators with - a higher or equal precedence, these operators are first popped off - from the stack and moved to the output list. Then the operator from the input - is pushed onto the stack and the rest of the input is processed. -\item If the input is a left-parenthesis, you push it on to the stack - and continue processing the input. -\item If the input is a right-parenthesis, then you pop off all operators - from the stack to the output list until you reach the left-parenthesis. - Then you discharge the $($ and $)$ from the input and stack, and continue - processing the input list. -\item If the input is empty, then you move all remaining operators - from the stack to the output list. -\end{itemize} - -\subsubsection*{Tasks (file postfix.scala)} - -\begin{itemize} -\item[(1)] Implement the shunting yard algorithm described above. The - function, called \texttt{syard}, takes a list of tokens as first - argument. The second and third arguments are the stack and output - list represented as Scala lists. The most convenient way to - implement this algorithm is to analyse what the input list, stack - and output list look like in each step using pattern-matching. The - algorithm transforms for example the input - - \[ - \texttt{List(3, +, 4, *, (, 2, -, 1, ))} - \] - - into the postfix output - - \[ - \texttt{List(3, 4, 2, 1, -, *, +)} - \] - - You can assume the input list is always a list representing - a well-formed infix arithmetic expression.\hfill[1 Mark] - -\item[(2)] Implement a compute function that takes a postfix expression - as argument and evaluates it generating an integer as result. It uses a - stack to evaluate the postfix expression. The operators $+$, $-$, $*$ - are as usual; $/$ is division on integers, for example $7 / 3 = 2$. - \hfill[1 Mark] -\end{itemize} - -\subsubsection*{Task (file postfix2.scala)} - -\begin{itemize} -\item[(3/4)] Extend the code in (1) and (2) to include the power - operator. This requires proper account of associativity of - the operators. The power operator is right-associative, whereas the - other operators are left-associative. Left-associative operators - are popped off if the precedence is bigger or equal, while - right-associative operators are only popped off if the precedence is - bigger.\hfill[1 Marks] -\end{itemize} - -\end{document} - - -%%% Local Variables: -%%% mode: latex -%%% TeX-master: t -%%% End: diff -r 017f621f5835 -r 3ffe978a5664 main_solution1/drumb.scala --- a/main_solution1/drumb.scala Thu Nov 04 12:20:12 2021 +0000 +++ b/main_solution1/drumb.scala Fri Nov 05 16:47:55 2021 +0000 @@ -24,7 +24,7 @@ // strings for each line in the CSV-file. def get_january_data(symbol: String, year: Int) : List[String] = - Source.fromFile(symbol ++ ".csv")("ISO-8859-1").getLines.toList.filter(_.startsWith(year.toString)) + Source.fromFile(symbol ++ ".csv")("ISO-8859-1").getLines().toList.filter(_.startsWith(year.toString)) //test cases @@ -169,10 +169,32 @@ //test cases for the two portfolios given above -//println("Real data: " + investment(rstate_portfolio, 1978 to 2019, 100)) -//println("Blue data: " + investment(blchip_portfolio, 1978 to 2019, 100)) + println("Real data: " + investment(rstate_portfolio, 1978 to 2019, 100)) + println("Blue data: " + investment(blchip_portfolio, 1978 to 2019, 100)) + } + + + + + + + + + + + + + + + + + + + +import CW6b._ +investment(rstate_portfolio, 1978 to 2019, 100) diff -r 017f621f5835 -r 3ffe978a5664 main_solution3/re.scala --- a/main_solution3/re.scala Thu Nov 04 12:20:12 2021 +0000 +++ b/main_solution3/re.scala Fri Nov 05 16:47:55 2021 +0000 @@ -1,19 +1,24 @@ // Core Part about Regular Expression Matching //============================================= -object CW8c { +object M3 { // Regular Expressions abstract class Rexp case object ZERO extends Rexp case object ONE extends Rexp case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp -case class SEQ(r1: Rexp, r2: Rexp) extends Rexp -case class STAR(r: Rexp) extends Rexp +case class ALTs(rs: List[Rexp]) extends Rexp // alternatives +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence +case class STAR(r: Rexp) extends Rexp // star // some convenience for typing in regular expressions + +//the usual binary choice can be defined in terms of ALTs +def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2)) + + import scala.language.implicitConversions import scala.language.reflectiveCalls @@ -49,7 +54,7 @@ case ZERO => false case ONE => true case CHAR(_) => false - case ALT(r1, r2) => nullable(r1) || nullable(r2) + case ALTs(rs) => rs.exists(nullable) case SEQ(r1, r2) => nullable(r1) && nullable(r2) case STAR(_) => true } @@ -63,13 +68,22 @@ case ZERO => ZERO case ONE => ZERO case CHAR(d) => if (c == d) ONE else ZERO - case ALT(r1, r2) => ALT(der(c, r1), der(c, r2)) + case ALTs(rs) => ALTs(rs.map(der(c, _))) case SEQ(r1, r2) => if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2)) else SEQ(der(c, r1), r2) case STAR(r1) => SEQ(der(c, r1), STAR(r1)) } + + +def flts(rs: List[Rexp]) : List[Rexp] = rs match { + case Nil => Nil + case ZERO::tl => flts(tl) + case ALTs(rs1)::rs2 => rs1 ::: flts(rs2) + case r::rs => r :: flts(rs) +} + // (3) Complete the simp function according to // the specification given in the coursework; this // function simplifies a regular expression from @@ -77,11 +91,12 @@ // expressions; however it does not simplify inside // STAR-regular expressions. + def simp(r: Rexp) : Rexp = r match { - case ALT(r1, r2) => (simp(r1), simp(r2)) match { - case (ZERO, r2s) => r2s - case (r1s, ZERO) => r1s - case (r1s, r2s) => if (r1s == r2s) r1s else ALT (r1s, r2s) + case ALTs(rs) => (flts(rs.map(simp)).distinct) match { + case Nil => ZERO + case r::Nil => r + case rs => ALTs(rs) } case SEQ(r1, r2) => (simp(r1), simp(r2)) match { case (ZERO, _) => ZERO @@ -117,7 +132,7 @@ case ZERO => 1 case ONE => 1 case CHAR(_) => 1 - case ALT(r1, r2) => 1 + size(r1) + size (r2) + case ALTs(rs) => 1 + rs.map(size).sum case SEQ(r1, r2) => 1 + size(r1) + size (r2) case STAR(r1) => 1 + size(r1) } @@ -132,16 +147,19 @@ // the supposedly 'evil' regular expression (a*)* b val EVIL = SEQ(STAR(STAR(CHAR('a'))), CHAR('b')) -//matcher(EVIL, "a" * 1000 ++ "b") // => true -//matcher(EVIL, "a" * 1000) // => false +//println(matcher(EVIL, "a" * 1000 ++ "b")) // => true +//println(matcher(EVIL, "a" * 1000)) // => false // size without simplifications -//size(der('a', der('a', EVIL))) // => 28 -//size(der('a', der('a', der('a', EVIL)))) // => 58 +//println(size(der('a', der('a', EVIL)))) // => 28 +//println(size(der('a', der('a', der('a', EVIL))))) // => 58 // size with simplification -//size(simp(der('a', der('a', EVIL)))) // => 8 -//size(simp(der('a', der('a', der('a', EVIL))))) // => 8 +//println(simp(der('a', der('a', EVIL)))) // => 8 +//println(simp(der('a', der('a', der('a', EVIL)))))// => 8 + +//println(size(simp(der('a', der('a', EVIL))))) // => 8 +//println(size(simp(der('a', der('a', der('a', EVIL)))))) // => 8 // Python needs around 30 seconds for matching 28 a's with EVIL. // Java 9 and later increase this to an "astonishing" 40000 a's in diff -r 017f621f5835 -r 3ffe978a5664 main_templates1/drumb.jar Binary file main_templates1/drumb.jar has changed diff -r 017f621f5835 -r 3ffe978a5664 main_templates1/drumb.scala --- a/main_templates1/drumb.scala Thu Nov 04 12:20:12 2021 +0000 +++ b/main_templates1/drumb.scala Fri Nov 05 16:47:55 2021 +0000 @@ -1,7 +1,7 @@ -// Core Part about a really dumb investment strategy +// Main Part 1 about a really dumb investment strategy //=================================================== -object CW6b { +object M1 { //two test portfolios diff -r 017f621f5835 -r 3ffe978a5664 main_templates2/danube.jar Binary file main_templates2/danube.jar has changed diff -r 017f621f5835 -r 3ffe978a5664 main_templates2/danube.scala --- a/main_templates2/danube.scala Thu Nov 04 12:20:12 2021 +0000 +++ b/main_templates2/danube.scala Fri Nov 05 16:47:55 2021 +0000 @@ -1,8 +1,8 @@ -// Core Part about Movie Recommendations +// Main Part 2 about Movie Recommendations // at Danube.co.uk //=========================================== -object CW7b { +object M2 { import io.Source import scala.util._ @@ -158,29 +158,4 @@ -// (7) Calculate the recommendations for all movies according to -// what the recommendations function in (6) produces (this -// can take a few seconds). Put all recommendations into a list -// (of strings) and count how often the strings occur in -// this list. This produces a list of string-int pairs, -// where the first component is the movie name and the second -// is the number of how many times the movie was recommended. -// Sort all the pairs according to the number -// of times they were recommended (most recommended movie name -// first). - -def most_recommended(recs: Map[String, List[String]], - movs: Map[String, String]) : List[(String, Int)] = ??? - - -// testcase -// -//most_recommended(ratings_map, movies_map).take(3) -// => -// List((Matrix,698), -// (Star Wars: Episode IV - A New Hope (1977),402), -// (Jerry Maguire (1996),382)) - - - } diff -r 017f621f5835 -r 3ffe978a5664 main_templates3/re.jar Binary file main_templates3/re.jar has changed diff -r 017f621f5835 -r 3ffe978a5664 main_templates3/re.scala --- a/main_templates3/re.scala Thu Nov 04 12:20:12 2021 +0000 +++ b/main_templates3/re.scala Fri Nov 05 16:47:55 2021 +0000 @@ -1,20 +1,24 @@ -// Core Part about Regular Expression Matching +// Main Part 3 about Regular Expression Matching //============================================= -object CW8c { +object M3 { // Regular Expressions abstract class Rexp case object ZERO extends Rexp case object ONE extends Rexp case class CHAR(c: Char) extends Rexp -case class ALT(r1: Rexp, r2: Rexp) extends Rexp // alternative +case class ALTs(rs: List[Rexp]) extends Rexp // alternatives case class SEQ(r1: Rexp, r2: Rexp) extends Rexp // sequence case class STAR(r: Rexp) extends Rexp // star // some convenience for typing regular expressions +//the usual binary choice can be defined in terms of ALTs +def ALT(r1: Rexp, r2: Rexp) = ALTs(List(r1, r2)) + + import scala.language.implicitConversions import scala.language.reflectiveCalls @@ -56,17 +60,27 @@ def der (c: Char, r: Rexp) : Rexp = ??? -// (3) Complete the simp function according to -// the specification given in the coursework; this -// function simplifies a regular expression from +// (3) Implement the flatten function flts. It +// deletes 0s from a list of regular expressions +// and also 'spills out', or flattens, nested +// ALTernativeS. + +def flts(rs: List[Rexp]) : List[Rexp] = ??? + + + +// (4) Complete the simp function according to +// the specification given in the coursework description; +// this function simplifies a regular expression from // the inside out, like you would simplify arithmetic // expressions; however it does not simplify inside -// STAR-regular expressions. +// STAR-regular expressions. Use the _.distinct and +// flts functions. def simp(r: Rexp) : Rexp = ??? -// (4) Complete the two functions below; the first +// (5) Complete the two functions below; the first // calculates the derivative w.r.t. a string; the second // is the regular expression matcher taking a regular // expression and a string and checks whether the @@ -77,7 +91,7 @@ def matcher(r: Rexp, s: String): Boolean = ??? -// (5) Complete the size function for regular +// (6) Complete the size function for regular // expressions according to the specification // given in the coursework. diff -r 017f621f5835 -r 3ffe978a5664 pre_templates1/collatz.jar Binary file pre_templates1/collatz.jar has changed diff -r 017f621f5835 -r 3ffe978a5664 pre_templates1/collatz.scala --- a/pre_templates1/collatz.scala Thu Nov 04 12:20:12 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -// Preliminary Part about the 3n+1 conjecture -//============================================ - -object CW6a { - -//(1) Complete the collatz function below. It should -// recursively calculate the number of steps needed -// until the collatz series reaches the number 1. -// If needed, you can use an auxiliary function that -// performs the recursion. The function should expect -// arguments in the range of 1 to 1 Million. - -def collatz(n: Long) : Long = ??? - - -//(2) Complete the collatz_max function below. It should -// calculate how many steps are needed for each number -// from 1 up to a bound and then calculate the maximum number of -// steps and the corresponding number that needs that many -// steps. Again, you should expect bounds in the range of 1 -// up to 1 Million. The first component of the pair is -// the maximum number of steps and the second is the -// corresponding number. - -def collatz_max(bnd: Long) : (Long, Long) = ??? - -//(3) Implement a function that calculates the last_odd -// number in a collatz series. For this implement an -// is_pow_of_two function which tests whether a number -// is a power of two. The function is_hard calculates -// whether 3n + 1 is a power of two. Again you can -// assume the input ranges between 1 and 1 Million, -// and also assume that the input of last_odd will not -// be a power of 2. - -def is_pow_of_two(n: Long) : Boolean = ??? - -def is_hard(n: Long) : Boolean = ??? - -def last_odd(n: Long) : Long = ??? - -} - diff -r 017f621f5835 -r 3ffe978a5664 pre_templates2/docdiff.jar Binary file pre_templates2/docdiff.jar has changed diff -r 017f621f5835 -r 3ffe978a5664 pre_templates2/docdiff.scala --- a/pre_templates2/docdiff.scala Thu Nov 04 12:20:12 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ -// Preliminary Part about Code Similarity -//======================================== - - -object CW7a { - - -//(1) Complete the clean function below. It should find -// all words in a string using the regular expression -// \w+ and the library function -// -// some_regex.findAllIn(some_string) -// -// The words should be Returned as a list of strings. - - -def clean(s: String) : List[String] = ??? - - - -//(2) The function occurrences calculates the number of times -// strings occur in a list of strings. These occurrences should -// be calculated as a Map from strings to integers. - - -def occurrences(xs: List[String]): Map[String, Int] = ??? - - -//(3) This functions calculates the dot-product of two documents -// (list of strings). For this it calculates the occurrence -// maps from (2) and then multiplies the corresponding occurrences. -// If a string does not occur in a document, the product is zero. -// The function finally sums up all products. - - -def prod(lst1: List[String], lst2: List[String]) : Int = ??? - - -//(4) Complete the functions overlap and similarity. The overlap of -// two documents is calculated by the formula given in the assignment -// description. The similarity of two strings is given by the overlap -// of the cleaned strings (see (1)). - - -def overlap(lst1: List[String], lst2: List[String]) : Double = ??? - -def similarity(s1: String, s2: String) : Double = ??? - - - -/* Test cases - - -val list1 = List("a", "b", "b", "c", "d") -val list2 = List("d", "b", "d", "b", "d") - -occurrences(List("a", "b", "b", "c", "d")) // Map(a -> 1, b -> 2, c -> 1, d -> 1) -occurrences(List("d", "b", "d", "b", "d")) // Map(d -> 3, b -> 2) - -prod(list1,list2) // 7 - -overlap(list1, list2) // 0.5384615384615384 -overlap(list2, list1) // 0.5384615384615384 -overlap(list1, list1) // 1.0 -overlap(list2, list2) // 1.0 - -// Plagiarism examples from -// https://desales.libguides.com/avoidingplagiarism/examples - -val orig1 = """There is a strong market demand for eco-tourism in -Australia. Its rich and diverse natural heritage ensures Australia's -capacity to attract international ecotourists and gives Australia a -comparative advantage in the highly competitive tourism industry.""" - -val plag1 = """There is a high market demand for eco-tourism in -Australia. Australia has a comparative advantage in the highly -competitive tourism industry due to its rich and varied natural -heritage which ensures Australia's capacity to attract international -ecotourists.""" - -similarity(orig1, plag1) // 0.8679245283018868 - - -// Plagiarism examples from -// https://www.utc.edu/library/help/tutorials/plagiarism/examples-of-plagiarism.php - -val orig2 = """No oil spill is entirely benign. Depending on timing and -location, even a relatively minor spill can cause significant harm to -individual organisms and entire populations. Oil spills can cause -impacts over a range of time scales, from days to years, or even -decades for certain spills. Impacts are typically divided into acute -(short-term) and chronic (long-term) effects. Both types are part of a -complicated and often controversial equation that is addressed after -an oil spill: ecosystem recovery.""" - -val plag2 = """There is no such thing as a "good" oil spill. If the -time and place are just right, even a small oil spill can cause damage -to sensitive ecosystems. Further, spills can cause harm days, months, -years, or even decades after they occur. Because of this, spills are -usually broken into short-term (acute) and long-term (chronic) -effects. Both of these types of harm must be addressed in ecosystem -recovery: a controversial tactic that is often implemented immediately -following an oil spill.""" - -overlap(clean(orig2), clean(plag2)) // 0.728 -similarity(orig2, plag2) // 0.728 - - - -// The punchline: everything above 0.6 looks suspicious and -// should be investigated by staff. - -*/ - -} diff -r 017f621f5835 -r 3ffe978a5664 pre_templates3/postfix.jar Binary file pre_templates3/postfix.jar has changed diff -r 017f621f5835 -r 3ffe978a5664 pre_templates3/postfix.scala --- a/pre_templates3/postfix.scala Thu Nov 04 12:20:12 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,81 +0,0 @@ -// Shunting Yard Algorithm -// by Edsger Dijkstra -// ======================== - -object CW8a { - -// type of tokens -type Toks = List[String] - -// the operations in the basic version of the algorithm -val ops = List("+", "-", "*", "/") - -// the precedences of the operators -val precs = Map("+" -> 1, - "-" -> 1, - "*" -> 2, - "/" -> 2) - -// helper function for splitting strings into tokens -def split(s: String) : Toks = s.split(" ").toList - - -// (1) Implement below the shunting yard algorithm. The most -// convenient way to this in Scala is to implement a recursive -// function and to heavily use pattern matching. The function syard -// takes some input tokens as first argument. The second and third -// arguments represent the stack and the output of the shunting yard -// algorithm. -// -// In the marking, you can assume the function is called only with -// an empty stack and an empty output list. You can also assume the -// input os only properly formatted (infix) arithmetic expressions -// (all parentheses will be well-nested, the input only contains -// operators and numbers). - -// You can implement any additional helper function you need. I found -// it helpful to implement two auxiliary functions for the pattern matching: -// - -def is_op(op: String) : Boolean = ??? -def prec(op1: String, op2: String) : Boolean = ??? - - -def syard(toks: Toks, st: Toks = Nil, out: Toks = Nil) : Toks = ??? - - -// test cases -//syard(split("3 + 4 * ( 2 - 1 )")) // 3 4 2 1 - * + -//syard(split("10 + 12 * 33")) // 10 12 33 * + -//syard(split("( 5 + 7 ) * 2")) // 5 7 + 2 * -//syard(split("5 + 7 / 2")) // 5 7 2 / + -//syard(split("5 * 7 / 2")) // 5 7 * 2 / -//syard(split("9 + 24 / ( 7 - 3 )")) // 9 24 7 3 - / + - -//syard(split("3 + 4 + 5")) // 3 4 + 5 + -//syard(split("( ( 3 + 4 ) + 5 )")) // 3 4 + 5 + -//syard(split("( 3 + ( 4 + 5 ) )")) // 3 4 5 + + -//syard(split("( ( ( 3 ) ) + ( ( 4 + ( 5 ) ) ) )")) // 3 4 5 + + - - -// (2) Implement a compute function that evaluates an input list -// in postfix notation. This function takes a list of tokens -// and a stack as argumenta. The function should produce the -// result as an integer using the stack. You can assume -// this function will be only called with proper postfix -// expressions. - -def compute(toks: Toks, st: List[Int] = Nil) : Int = ??? - - -// test cases -// compute(syard(split("3 + 4 * ( 2 - 1 )"))) // 7 -// compute(syard(split("10 + 12 * 33"))) // 406 -// compute(syard(split("( 5 + 7 ) * 2"))) // 24 -// compute(syard(split("5 + 7 / 2"))) // 8 -// compute(syard(split("5 * 7 / 2"))) // 17 -// compute(syard(split("9 + 24 / ( 7 - 3 )"))) // 15 - -} - - diff -r 017f621f5835 -r 3ffe978a5664 pre_templates3/postfix2.jar Binary file pre_templates3/postfix2.jar has changed diff -r 017f621f5835 -r 3ffe978a5664 pre_templates3/postfix2.scala --- a/pre_templates3/postfix2.scala Thu Nov 04 12:20:12 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,69 +0,0 @@ -// Shunting Yard Algorithm -// including Associativity for Operators -// ===================================== - -object CW8b { - - -// type of tokens -type Toks = List[String] - -// helper function for splitting strings into tokens -def split(s: String) : Toks = s.split(" ").toList - -// left- and right-associativity -abstract class Assoc -case object LA extends Assoc -case object RA extends Assoc - - -// power is right-associative, -// everything else is left-associative -def assoc(s: String) : Assoc = s match { - case "^" => RA - case _ => LA -} - - -// the precedences of the operators -val precs = Map("+" -> 1, - "-" -> 1, - "*" -> 2, - "/" -> 2, - "^" -> 4) - -// the operations in the basic version of the algorithm -val ops = List("+", "-", "*", "/", "^") - -// (3) Implement the extended version of the shunting yard algorithm. -// This version should properly account for the fact that the power -// operation is right-associative. Apart from the extension to include -// the power operation, you can make the same assumptions as in -// basic version. - -def syard(toks: Toks, st: Toks = Nil, out: Toks = Nil) : Toks = ??? - - -// test cases -// syard(split("3 + 4 * 8 / ( 5 - 1 ) ^ 2 ^ 3")) // 3 4 8 * 5 1 - 2 3 ^ ^ / + - - -// (4) Implement a compute function that produces an Int for an -// input list of tokens in postfix notation. - -def compute(toks: Toks, st: List[Int] = Nil) : Int = ??? - - -// test cases -// compute(syard(split("3 + 4 * ( 2 - 1 )"))) // 7 -// compute(syard(split("10 + 12 * 33"))) // 406 -// compute(syard(split("( 5 + 7 ) * 2"))) // 24 -// compute(syard(split("5 + 7 / 2"))) // 8 -// compute(syard(split("5 * 7 / 2"))) // 17 -// compute(syard(split("9 + 24 / ( 7 - 3 )"))) // 15 -// compute(syard(split("4 ^ 3 ^ 2"))) // 262144 -// compute(syard(split("4 ^ ( 3 ^ 2 )"))) // 262144 -// compute(syard(split("( 4 ^ 3 ) ^ 2"))) // 4096 -// compute(syard(split("( 3 + 1 ) ^ 2 ^ 3"))) // 65536 - -}