pep-material: cws/pre_cw01.tex@d2f895c1dba6 (annotated)

276 5a8ef4dd6cc9 updated Christian Urban <urbanc@in.tum.de> parents: 266 diff changeset	1	% !TEX program = xelatex
6 aae256985251 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	2	\documentclass{article}
11 417869f65585 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 9 diff changeset	3	\usepackage{../style}
195 4bacbe753e66 updated Christian Urban <urbanc@in.tum.de> parents: 192 diff changeset	4	\usepackage{disclaimer}
199 2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	5	\usepackage{../langs}
6 aae256985251 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	6
335 9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	7
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	8
6 aae256985251 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	9	\begin{document}
aae256985251 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	10
356 35c75b594459 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 355 diff changeset	11	\section*{Preliminary Part 1 (Scala, 3 Marks)}
199 2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	12
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	13	\mbox{}\hfill\textit{``The most effective debugging tool is still careful thought,}\\
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	14	\mbox{}\hfill\textit{coupled with judiciously placed print statements.''}\smallskip\\
335 9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	15	\mbox{}\hfill\textit{ --- Brian W. Kernighan, in Unix for Beginners (1979)}\bigskip
199 2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	16
279 68b36b4efe82 updated Christian Urban <urbanc@in.tum.de> parents: 276 diff changeset	17
355 e4d779587d3f updated Christian Urban <christian.urban@kcl.ac.uk> parents: 345 diff changeset	18	\IMPORTANT{This part is about Scala. It is due on \cwSIX{} at 5pm and worth 3\%.
e4d779587d3f updated Christian Urban <christian.urban@kcl.ac.uk> parents: 345 diff changeset	19	Any 1\% you achieve in the preliminary part counts as your ``weekly engagement''.}
195 4bacbe753e66 updated Christian Urban <urbanc@in.tum.de> parents: 192 diff changeset	20
127 01d522ba48d4 updated Christian Urban <urbanc@in.tum.de> parents: 125 diff changeset	21	\noindent
195 4bacbe753e66 updated Christian Urban <urbanc@in.tum.de> parents: 192 diff changeset	22	Also note that the running time of each part will be restricted to a
199 2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	23	maximum of 30 seconds on my laptop.
192 cd2a9c969ef2 updated Christian Urban <urbanc@in.tum.de> parents: 167 diff changeset	24
196 4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	25	\DISCLAIMER{}
6 aae256985251 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	26
201 dc505025ce15 updated Christian Urban <urbanc@in.tum.de> parents: 199 diff changeset	27	\subsection*{Reference Implementation}
6 aae256985251 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	28
199 2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	29	Like the C++ assignments, the Scala assignments will work like this: you
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	30	push your files to GitHub and receive (after sometimes a long delay) some
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	31	automated feedback. In the end we take a snapshot of the submitted files and
266 31e5218f43de updated to 2.13 Christian Urban <urbanc@in.tum.de> parents: 252 diff changeset	32	apply an automated marking script to them.\medskip
199 2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	33
266 31e5218f43de updated to 2.13 Christian Urban <urbanc@in.tum.de> parents: 252 diff changeset	34	\noindent
306 63d4301a8bc6 update Christian Urban <urbanc@in.tum.de> parents: 284 diff changeset	35	In addition, the Scala coursework comes with a reference implementation
335 9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	36	in form of \texttt{jar}-files. This allows you to run any test cases on
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	37	your own computer. For example you can call Scala on the command line
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	38	with the option \texttt{-cp collatz.jar} and then query any function
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	39	from the template file. Say you want to find out what the functions
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	40	\texttt{collatz} and \texttt{collatz\_max} produce: for this you just
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	41	need to prefix them with the object name \texttt{CW6a}. If you want to
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	42	find out what these functions produce for the argument \texttt{6}, you
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	43	would type something like:
199 2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	44
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	45	\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	46	$ scala -cp collatz.jar
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	47
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	48	scala> CW6a.collatz(6)
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	49	...
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	50	scala> CW6a.collatz_max(6)
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	51	...
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	52	\end{lstlisting}%$
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	53
201 dc505025ce15 updated Christian Urban <urbanc@in.tum.de> parents: 199 diff changeset	54	\subsection*{Hints}
dc505025ce15 updated Christian Urban <urbanc@in.tum.de> parents: 199 diff changeset	55
dc505025ce15 updated Christian Urban <urbanc@in.tum.de> parents: 199 diff changeset	56	\noindent
345 69b39df6ad4d updated Christian Urban <christian.urban@kcl.ac.uk> parents: 344 diff changeset	57	\textbf{For the Preliminary Part:} useful math operators: \texttt{\%} for modulo, \texttt{\&} for bit-wise and; useful
201 dc505025ce15 updated Christian Urban <urbanc@in.tum.de> parents: 199 diff changeset	58	functions: \mbox{\texttt{(1\,to\,10)}} for ranges, \texttt{.toInt},
335 9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	59	\texttt{.toList} for conversions, you can use \texttt{List(...).max} for the
201 dc505025ce15 updated Christian Urban <urbanc@in.tum.de> parents: 199 diff changeset	60	maximum of a list, \texttt{List(...).indexOf(...)} for the first index of
dc505025ce15 updated Christian Urban <urbanc@in.tum.de> parents: 199 diff changeset	61	a value in a list.\bigskip
dc505025ce15 updated Christian Urban <urbanc@in.tum.de> parents: 199 diff changeset	62
dc505025ce15 updated Christian Urban <urbanc@in.tum.de> parents: 199 diff changeset	63
dc505025ce15 updated Christian Urban <urbanc@in.tum.de> parents: 199 diff changeset	64
dc505025ce15 updated Christian Urban <urbanc@in.tum.de> parents: 199 diff changeset	65	\newpage
279 68b36b4efe82 updated Christian Urban <urbanc@in.tum.de> parents: 276 diff changeset	66	\subsection*{Preliminary Part (3 Marks, file collatz.scala)}
6 aae256985251 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	67
336 cccdae0fccc7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 335 diff changeset	68	This part is about function definitions and recursion. You are asked
cccdae0fccc7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 335 diff changeset	69	to implement a Scala program that tests examples of the
cccdae0fccc7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 335 diff changeset	70	\emph{$3n + 1$-conjecture}, also called \emph{Collatz
cccdae0fccc7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 335 diff changeset	71	conjecture}.\video{https://www.youtube.com./watch?v=LqKpkdRRLZw}
cccdae0fccc7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 335 diff changeset	72	This conjecture can be described as follows: Start with any positive
cccdae0fccc7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 335 diff changeset	73	number $n$ greater than $0$:
18 87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	74
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	75	\begin{itemize}
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	76	\item If $n$ is even, divide it by $2$ to obtain $n / 2$.
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	77	\item If $n$ is odd, multiply it by $3$ and add $1$ to obtain $3n +
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	78	1$.
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	79	\item Repeat this process and you will always end up with $1$.
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	80	\end{itemize}
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	81
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	82	\noindent
266 31e5218f43de updated to 2.13 Christian Urban <urbanc@in.tum.de> parents: 252 diff changeset	83	For example if you start with, say, $6$ and $9$, you obtain the
335 9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	84	two \emph{Collatz series}
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	85	%
11 417869f65585 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 9 diff changeset	86	\[
417869f65585 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 9 diff changeset	87	\begin{array}{@{}l@{\hspace{5mm}}l@{}}
199 2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	88	6, 3, 10, 5, 16, 8, 4, 2, 1 & \text{(= 8 steps)}\\
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	89	9, 28, 14, 7, 22, 11, 34, 17, 52, 26, 13, 40, 20, 10, 5, 16, 8, 4, 2, 1 & \text{(= 19 steps)}\\
11 417869f65585 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 9 diff changeset	90	\end{array}
417869f65585 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 9 diff changeset	91	\]
417869f65585 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 9 diff changeset	92
417869f65585 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 9 diff changeset	93	\noindent
417869f65585 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 9 diff changeset	94	As you can see, the numbers go up and down like a roller-coaster, but
335 9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	95	curiously they seem to always terminate in $1$. Nobody knows why. The
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	96	conjecture is that this will \emph{always} happen for every number
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	97	greater than 0.\footnote{While it is relatively easy to test this
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	98	conjecture with particular numbers, it is an interesting open problem to
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	99	\emph{prove} that the conjecture is true for \emph{all} numbers ($> 0$).
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	100	Paul Erd\"o{}s, a famous mathematician you might have heard about, said
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	101	about this conjecture: ``Mathematics may not [yet] be ready for such
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	102	problems.'' and also offered a \$500 cash prize for its solution.
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	103	Jeffrey Lagarias, another mathematician, claimed that based only on
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	104	known information about this problem, ``this is an extraordinarily
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	105	difficult problem, completely out of reach of present day mathematics.''
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	106	There is also a \href{https://xkcd.com/710/}{xkcd} cartoon about this
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	107	conjecture\here{https://xkcd.com/710/}). If you are able to solve this
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	108	conjecture, you will definitely get famous.}\bigskip
18 87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	109
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	110	\noindent
199 2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	111	\textbf{Tasks}
18 87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	112
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	113	\begin{itemize}
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	114	\item[(1)] You are asked to implement a recursive function that
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	115	calculates the number of steps needed until a series ends
199 2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	116	with $1$. In case of starting with $6$, it takes $8$ steps and in
335 9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	117	case of starting with $9$, it takes $19$ (see above). We assume it
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	118	takes $0$ steps, if we start with $1$. In order to
18 87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	119	try out this function with large numbers, you should use
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	120	\texttt{Long} as argument type, instead of \texttt{Int}. You can
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	121	assume this function will be called with numbers between $1$ and
335 9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	122	$1$ Million. \hfill[1 Mark]
18 87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	123
87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	124	\item[(2)] Write a second function that takes an upper bound as
282 2d290b79fc73 updated Christian Urban <urbanc@in.tum.de> parents: 279 diff changeset	125	an argument and calculates the steps for all numbers in the range from
210 34f935e13bdd updated Christian Urban <urbanc@in.tum.de> parents: 202 diff changeset	126	1 up to this bound (the bound including). It returns the maximum number of
34f935e13bdd updated Christian Urban <urbanc@in.tum.de> parents: 202 diff changeset	127	steps and the corresponding number that needs that many steps. More
34f935e13bdd updated Christian Urban <urbanc@in.tum.de> parents: 202 diff changeset	128	precisely it returns a pair where the first component is the number
34f935e13bdd updated Christian Urban <urbanc@in.tum.de> parents: 202 diff changeset	129	of steps and the second is the corresponding number. \hfill\mbox{[1
34f935e13bdd updated Christian Urban <urbanc@in.tum.de> parents: 202 diff changeset	130	Mark]}
335 9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	131
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	132	\item[(3)] Write a function that calculates \emph{hard
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	133	numbers} \here{https://medium.com/cantors-paradise/the-collatz-conjecture-some-shocking-results-from-180-000-iterations-7fea130d0377}
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	134	in the Collatz series---these are the last odd numbers just before a
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	135	power of two is reached. For this, implement an
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	136	\textit{is-power-of-two} function which tests whether a number is a
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	137	power of two. The easiest way to implement this is by using the
336 cccdae0fccc7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 335 diff changeset	138	bit-operator $\&$ of Scala. For a power of two, say $n$ with $n > 0$, it
335 9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	139	holds that $n \;\&\; (n - 1)$ is equal to zero. I let you think why
344 8686ae7174c3 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 343 diff changeset	140	this is the case.
8686ae7174c3 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 343 diff changeset	141
8686ae7174c3 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 343 diff changeset	142	The function \textit{is-hard} calculates whether
335 9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	143	$3n + 1$ is a power of two. Finally the \textit{last-odd} function
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	144	calculates the last odd number before a power of 2 in the Collatz
344 8686ae7174c3 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 343 diff changeset	145	series. This means for example when starting with 9,
335 9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	146	we receive 5 as the last odd number. Surprisingly a lot of numbers
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	147	have 5 as last-odd number. But for example for 113 we obtain 85,
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	148	because of the series
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	149	%
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	150	\[113, 340, 170, \,\fbox{85}\,, 256, 128, 64, 32, 16, 8, 4, 2, 1\]
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	151
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	152	The \textit{last-odd} function will only be called with numbers that are not
9476aee44eed updated Christian Urban <christian.urban@kcl.ac.uk> parents: 315 diff changeset	153	powers of 2 themselves.
18 87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	154	\end{itemize}
11 417869f65585 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 9 diff changeset	155
417869f65585 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 9 diff changeset	156	\noindent
336 cccdae0fccc7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 335 diff changeset	157	\textbf{Test Data:} Some test ranges and cases are:
11 417869f65585 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 9 diff changeset	158
417869f65585 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 9 diff changeset	159	\begin{itemize}
199 2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	160	\item 1 to 10 where $9$ takes 19 steps
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	161	\item 1 to 100 where $97$ takes 118 steps,
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	162	\item 1 to 1,000 where $871$ takes 178 steps,
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	163	\item 1 to 10,000 where $6,171$ takes 261 steps,
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	164	\item 1 to 100,000 where $77,031$ takes 350 steps,
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	165	\item 1 to 1 Million where $837,799$ takes 524 steps
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	166	%% runs out of stack space
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	167	%% \item[$\bullet$] $1 - 10$ million where $8,400,511$ takes 685 steps
336 cccdae0fccc7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 335 diff changeset	168	\item 21 is the last odd number for 84
cccdae0fccc7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 335 diff changeset	169	\item 341 is the last odd number for 201, 604, 605 and 8600
cccdae0fccc7 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 335 diff changeset	170
196 4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	171	\end{itemize}
18 87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	172
201 dc505025ce15 updated Christian Urban <urbanc@in.tum.de> parents: 199 diff changeset	173
127 01d522ba48d4 updated Christian Urban <urbanc@in.tum.de> parents: 125 diff changeset	174
01d522ba48d4 updated Christian Urban <urbanc@in.tum.de> parents: 125 diff changeset	175
196 4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	176
199 2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	177
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	178	\end{document}
2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	179
276 5a8ef4dd6cc9 updated Christian Urban <urbanc@in.tum.de> parents: 266 diff changeset	180
5a8ef4dd6cc9 updated Christian Urban <urbanc@in.tum.de> parents: 266 diff changeset	181	%%%%%%% Historical Stuff
199 2e13dedd922e updated Christian Urban <urbanc@in.tum.de> parents: 197 diff changeset	182	\newpage
192 cd2a9c969ef2 updated Christian Urban <urbanc@in.tum.de> parents: 167 diff changeset	183
196 4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	184	This part is about web-scraping and list-processing in Scala. It uses
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	185	online data about the per-capita alcohol consumption for each country
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	186	(per year?), and a file containing the data about the population size of
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	187	each country. From this data you are supposed to estimate how many
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	188	litres of pure alcohol are consumed worldwide.\bigskip
192 cd2a9c969ef2 updated Christian Urban <urbanc@in.tum.de> parents: 167 diff changeset	189
cd2a9c969ef2 updated Christian Urban <urbanc@in.tum.de> parents: 167 diff changeset	190	\noindent
196 4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	191	\textbf{Tasks (file alcohol.scala):}
192 cd2a9c969ef2 updated Christian Urban <urbanc@in.tum.de> parents: 167 diff changeset	192
cd2a9c969ef2 updated Christian Urban <urbanc@in.tum.de> parents: 167 diff changeset	193	\begin{itemize}
196 4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	194	\item[(1)] Write a function that given an URL requests a
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	195	comma-separated value (CSV) list. We are interested in the list
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	196	from the following URL
192 cd2a9c969ef2 updated Christian Urban <urbanc@in.tum.de> parents: 167 diff changeset	197
cd2a9c969ef2 updated Christian Urban <urbanc@in.tum.de> parents: 167 diff changeset	198	\begin{center}
196 4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	199	\url{https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv}
192 cd2a9c969ef2 updated Christian Urban <urbanc@in.tum.de> parents: 167 diff changeset	200	\end{center}
127 01d522ba48d4 updated Christian Urban <urbanc@in.tum.de> parents: 125 diff changeset	201
196 4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	202	\noindent Your function should take a string (the URL) as input, and
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	203	produce a list of strings as output, where each string is one line in
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	204	the corresponding CSV-list. This list from the URL above should
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	205	contain 194 lines.\medskip
192 cd2a9c969ef2 updated Christian Urban <urbanc@in.tum.de> parents: 167 diff changeset	206
cd2a9c969ef2 updated Christian Urban <urbanc@in.tum.de> parents: 167 diff changeset	207	\noindent
196 4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	208	Write another function that can read the file \texttt{population.csv}
201 dc505025ce15 updated Christian Urban <urbanc@in.tum.de> parents: 199 diff changeset	209	from disk (the file is distributed with the assignment). This
196 4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	210	function should take a string as argument, the file name, and again
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	211	return a list of strings corresponding to each entry in the
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	212	CSV-list. For \texttt{population.csv}, this list should contain 216
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	213	lines.\hfill[1 Mark]
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	214
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	215
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	216	\item[(2)] Unfortunately, the CSV-lists contain a lot of ``junk'' and we
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	217	need to extract the data that interests us. From the header of the
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	218	alcohol list, you can see there are 5 columns
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	219
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	220	\begin{center}
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	221	\begin{tabular}{l}
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	222	\texttt{country (name),}\\
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	223	\texttt{beer\_servings,}\\
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	224	\texttt{spirit\_servings,}\\
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	225	\texttt{wine\_servings,}\\
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	226	\texttt{total\_litres\_of\_pure\_alcohol}
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	227	\end{tabular}
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	228	\end{center}
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	229
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	230	\noindent
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	231	Write a function that extracts the data from the first column,
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	232	the country name, and the data from the fifth column (converted into
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	233	a \texttt{Double}). For this go through each line of the CSV-list
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	234	(except the first line), use the \texttt{split(",")} function to
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	235	divide each line into an array of 5 elements. Keep the data from the
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	236	first and fifth element in these arrays.\medskip
192 cd2a9c969ef2 updated Christian Urban <urbanc@in.tum.de> parents: 167 diff changeset	237
196 4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	238	\noindent
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	239	Write another function that processes the population size list. This
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	240	is already of the form country name and population size.\footnote{Your
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	241	friendly lecturer already did the messy processing for you from the
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	242	Worldbank database, see \url{https://github.com/datasets/population/tree/master/data} for the original.} Again, split the
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	243	strings according to the commas. However, this time generate a
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	244	\texttt{Map} from country names to population sizes.\hfill[1 Mark]
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	245
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	246	\item[(3)] In (2) you generated the data about the alcohol consumption
343 51e25cc30483 updated Christian Urban <christian.urban@kcl.ac.uk> parents: 336 diff changeset	247	per-capita for each country, and also the population size for each
196 4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	248	country. From this generate next a sorted(!) list of the overall
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	249	alcohol consumption for each country. The list should be sorted from
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	250	highest alcohol consumption to lowest. The difficulty is that the
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	251	data is scraped off from ``random'' sources on the Internet and
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	252	annoyingly the spelling of some country names does not always agree in both
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	253	lists. For example the alcohol list contains
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	254	\texttt{Bosnia-Herzegovina}, while the population writes this country as
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	255	\texttt{Bosnia and Herzegovina}. In your sorted
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	256	overall list include only countries from the alcohol list, whose
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	257	exact country name is also in the population size list. This means
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	258	you can ignore countries like Bosnia-Herzegovina from the overall
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	259	alcohol consumption. There are 177 countries where the names
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	260	agree. The UK is ranked 10th on this list by
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	261	consuming 671,976,864 Litres of pure alcohol each year.\medskip
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	262
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	263	\noindent
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	264	Finally, write another function that takes an integer, say
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	265	\texttt{n}, as argument. You can assume this integer is between 0
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	266	and 177 (the number of countries in the sorted list above). The
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	267	function should return a triple, where the first component is the
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	268	sum of the alcohol consumption in all countries (on the list); the
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	269	second component is the sum of the \texttt{n}-highest alcohol
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	270	consumers on the list; and the third component is the percentage the
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	271	\texttt{n}-highest alcohol consumers drink with respect to the
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	272	the world consumption. You will see that according to our data, 164
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	273	countries (out of 177) gobble up 100\% of the World alcohol
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	274	consumption.\hfill\mbox{[1 Mark]}
18 87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	275	\end{itemize}
11 417869f65585 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 9 diff changeset	276
417869f65585 updated Christian Urban <christian dot urban at kcl dot ac dot uk> parents: 9 diff changeset	277	\noindent
196 4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	278	\textbf{Hints:} useful list functions: \texttt{.drop(n)},
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	279	\texttt{.take(n)} for dropping or taking some elements in a list,
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	280	\texttt{.getLines} for separating lines in a string;
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	281	\texttt{.sortBy(\_.\_2)} sorts a list of pairs according to the second
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	282	elements in the pairs---the sorting is done from smallest to highest;
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	283	useful \texttt{Map} functions: \texttt{.toMap} converts a list of
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	284	pairs into a \texttt{Map}, \texttt{.isDefinedAt(k)} tests whether the
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	285	map is defined at that key, that is would produce a result when
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	286	called with this key; useful data functions: \texttt{Source.fromURL},
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	287	\texttt{Source.fromFile} for obtaining a webpage and reading a file.
127 01d522ba48d4 updated Christian Urban <urbanc@in.tum.de> parents: 125 diff changeset	288
196 4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	289	\newpage
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	290
4973c2fb3c66 updated Christian Urban <urbanc@in.tum.de> parents: 195 diff changeset	291
18 87e55eb309ed updated Christian Urban <urbanc@in.tum.de> parents: 11 diff changeset	292
129 1b0f1573c27c updated Christian Urban <urbanc@in.tum.de> parents: 127 diff changeset	293
135 e59aebee9770 updated Christian Urban <urbanc@in.tum.de> parents: 129 diff changeset	294
6 aae256985251 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	295
aae256985251 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	296	%%% Local Variables:
aae256985251 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	297	%%% mode: latex
aae256985251 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	298	%%% TeX-master: t
aae256985251 updated Christian Urban <urbanc@in.tum.de> parents: diff changeset	299	%%% End:

author	Christian Urban <christian.urban@kcl.ac.uk>
	Tue, 24 Nov 2020 09:04:06 +0000
changeset 366	d2f895c1dba6
parent 356	35c75b594459
permissions	-rw-r--r--