--- a/cws/cw01.tex Wed Jul 04 14:48:05 2018 +0100
+++ b/cws/cw01.tex Fri Oct 05 11:23:15 2018 +0100
@@ -1,6 +1,5 @@
\documentclass{article}
\usepackage{../style}
-\usepackage{disclaimer}
%%\usepackage{../langs}
\begin{document}
@@ -8,19 +7,30 @@
\section*{Coursework 6 (Scala)}
This coursework is about Scala and is worth 10\%. The first and second
-part are due on 16 November at 11pm, and the third part on 21 December
+part are due on 16 November at 11pm, and the third part on 23 November
at 11pm. You are asked to implement three programs about list
processing and recursion. The third part is more advanced and might
include material you have not yet seen in the first lecture.
-\bigskip
-
-\IMPORTANT{}
+Make sure the files you submit can be processed by just calling
+\texttt{scala <<filename.scala>>}.\bigskip
\noindent
-Also note that the running time of each part will be restricted to a
-maximum of 360 seconds on my laptop.
+\textbf{Important:} Do not use any mutable data structures in your
+submissions! They are not needed. This means you cannot use
+\texttt{ListBuffer}s, for example. Do not use \texttt{return} in your
+code! It has a different meaning in Scala, than in Java.
+Do not use \texttt{var}! This declares a mutable variable. Make sure the
+functions you submit are defined on the ``top-level'' of Scala, not
+inside a class or object. Also note that the running time of
+each part will be restricted to a maximum of 360 seconds on my laptop.
-\DISCLAIMER{}
+
+\subsection*{Disclaimer}
+
+It should be understood that the work you submit represents
+your own effort. You have not copied from anyone else. An
+exception is the Scala code I showed during the lectures or
+uploaded to KEATS, which you can freely use.\bigskip
\subsection*{Part 1 (3 Marks)}
@@ -66,6 +76,7 @@
(click \href{https://xkcd.com/710/}{here}). If you are able to solve
this conjecture, you will definitely get famous.}\bigskip
+\newpage
\noindent
\textbf{Tasks (file collatz.scala):}
@@ -77,7 +88,7 @@
try out this function with large numbers, you should use
\texttt{Long} as argument type, instead of \texttt{Int}. You can
assume this function will be called with numbers between $1$ and
- $1$ Million. \hfill[2 Marks]
+ $1$ million. \hfill[2 Marks]
\item[(2)] Write a second function that takes an upper bound as
argument and calculates the steps for all numbers in the range from
@@ -97,129 +108,93 @@
\item 1 to 1,000 where $871$ takes 179 steps,
\item 1 to 10,000 where $6,171$ takes 262 steps,
\item 1 to 100,000 where $77,031$ takes 351 steps,
-\item 1 to 1 Million where $837,799$ takes 525 steps
+\item 1 to 1 million where $837,799$ takes 525 steps
%%\item[$\bullet$] $1 - 10$ million where $8,400,511$ takes 686 steps
-\end{itemize}
+\end{itemize}\bigskip
-\noindent
-\textbf{Hints:} useful math operators: \texttt{\%} for modulo; useful
-functions: \mbox{\texttt{(1\,to\,10)}} for ranges, \texttt{.toInt},
-\texttt{.toList} for conversions, \texttt{List(...).max} for the
-maximum of a list, \texttt{List(...).indexOf(...)} for the first index of
-a value in a list.
-
-
-
-\subsection*{Part 2 (3 Marks)}
-
-This part is about web-scraping and list-processing in Scala. It uses
-online data about the per-capita alcohol consumption for each country
-(per year?), and a file containing the data about the population size of
-each country. From this data you are supposed to estimate how many
-litres of pure alcohol are consumed worldwide.\bigskip
-
-\noindent
-\textbf{Tasks (file alcohol.scala):}
-
-\begin{itemize}
-\item[(1)] Write a function that given an URL requests a
- comma-separated value (CSV) list. We are interested in the list
- from the following URL
-
-\begin{center}
- \url{https://raw.githubusercontent.com/fivethirtyeight/data/master/alcohol-consumption/drinks.csv}
-\end{center}
-
-\noindent Your function should take a string (the URL) as input, and
-produce a list of strings as output, where each string is one line in
-the corresponding CSV-list. This list from the URL above should
-contain 194 lines.\medskip
-
-\noindent
-Write another function that can read the file \texttt{population.csv}
-from disk (the file is distributed with the coursework). This
-function should take a string as argument, the file name, and again
-return a list of strings corresponding to each entry in the
-CSV-list. For \texttt{population.csv}, this list should contain 216
-lines.\hfill[1 Mark]
-\item[(2)] Unfortunately, the CSV-lists contain a lot of ``junk'' and we
- need to extract the data that interests us. From the header of the
- alcohol list, you can see there are 5 columns
-
- \begin{center}
- \begin{tabular}{l}
- \texttt{country (name),}\\
- \texttt{beer\_servings,}\\
- \texttt{spirit\_servings,}\\
- \texttt{wine\_servings,}\\
- \texttt{total\_litres\_of\_pure\_alcohol}
- \end{tabular}
- \end{center}
+\subsection*{Part 2 (4 Marks)}
+
+This part is about list processing---it's a variant of
+``buy-low-sell-high'' in Scala. It uses the online financial data
+service from Yahoo.\bigskip
+
+\noindent
+\textbf{Tasks (file trade.scala):}
+
+\begin{itemize}
+\item[(1)] Given a list of prices for a commodity, for example
- \noindent
- Write a function that extracts the data from the first column,
- the country name, and the data from the fifth column (converted into
- a \texttt{Double}). For this go through each line of the CSV-list
- (except the first line), use the \texttt{split(",")} function to
- divide each line into an array of 5 elements. Keep the data from the
- first and fifth element in these arrays.\medskip
+\[
+\texttt{List(28.0, 18.0, 20.0, 26.0, 24.0)}
+\]
- \noindent
- Write another function that processes the population size list. This
- is already of the form country name and population size.\footnote{Your
- friendly lecturer already did the messy processing for you from the
- Worldbank database, see \url{https://github.com/datasets/population/tree/master/data} for the original.} Again, split the
- strings according to the commas. However, this time generate a
- \texttt{Map} from country names to population sizes.\hfill[1 Mark]
+\noindent
+you need to write a function that returns a pair of indices for when
+to buy and when to sell this commodity. In the example above it should
+return the pair $\texttt{(1, 3)}$ because at index $1$ the price is lowest and
+then at index $3$ the price is highest. Note the prices are given as
+lists of \texttt{Double}s.\newline \mbox{} \hfill[1 Mark]
+
+\item[(2)] Write a function that requests a comma-separated value (CSV) list
+ from the Yahoo websevice that provides historical data for stock
+ indices. For example if you query the URL
+
+\begin{center}
+\url{http://ichart.yahoo.com/table.csv?s=GOOG}
+\end{center}
-\item[(3)] In (2) you generated the data about the alcohol consumption
- per capita for each country, and also the population size for each
- country. From this generate next a sorted(!) list of the overall
- alcohol consumption for each country. The list should be sorted from
- highest alcohol consumption to lowest. The difficulty is that the
- data is scraped off from ``random'' sources on the Internet and
- annoyingly the spelling of some country names does not always agree in both
- lists. For example the alcohol list contains
- \texttt{Bosnia-Herzegovina}, while the population writes this country as
- \texttt{Bosnia and Herzegovina}. In your sorted
- overall list include only countries from the alcohol list, whose
- exact country name is also in the population size list. This means
- you can ignore countries like Bosnia-Herzegovina from the overall
- alcohol consumption. There are 177 countries where the names
- agree. The UK is ranked 10th on this list by
- consuming 671,976,864 Litres of pure alcohol each year.\medskip
-
- \noindent
- Finally, write another function that takes an integer, say
- \texttt{n}, as argument. You can assume this integer is between 0
- and 177 (the number of countries in the sorted list above). The
- function should return a triple, where the first component is the
- sum of the alcohol consumption in all countries (on the list); the
- second component is the sum of the \texttt{n}-highest alcohol
- consumers on the list; and the third component is the percentage the
- \texttt{n}-highest alcohol consumers drink with respect to the
- the world consumption. You will see that according to our data, 164
- countries (out of 177) gobble up 100\% of the World alcohol
- consumption.\hfill\mbox{[1 Mark]}
+\noindent where \texttt{GOOG} stands for Google's stock market symbol,
+then you will receive a CSV-list of the daily stock prices since
+Google was listed. You can also try this with other stock market
+symbols, for instance AAPL, MSFT, IBM, FB, YHOO, AMZN, BIDU and so
+on.
+
+This function should return a List of strings, where each string
+is one line in this CVS-list (representing one day's
+data). Note that Yahoo generates its answer such that the newest data
+is at the front of this list, and the oldest data is at the end.
+\hfill[1 Mark]
+
+\item[(3)] As you can see, the financial data from Yahoo is organised in 7 columns,
+for example
+
+{\small\begin{verbatim}
+Date,Open,High,Low,Close,Volume,Adj Close
+2016-11-04,750.659973,770.359985,750.560974,762.02002,2126900,762.02002
+2016-11-03,767.25,769.950012,759.030029,762.130005,1914000,762.130005
+2016-11-02,778.200012,781.650024,763.450012,768.700012,1872400,768.700012
+2016-11-01,782.890015,789.48999,775.539978,783.609985,2404500,783.609985
+....
+\end{verbatim}}
+
+\noindent
+Write a function that ignores the first line (the header) and then
+extracts from each line the date (first column) and the Adjusted Close
+price (last column). The Adjusted Close price should be converted into
+a \texttt{Double}. So the result of this function is a list of pairs where the
+first components are strings (the dates) and the second are doubles
+(the adjusted close prices).\newline\mbox{}\hfill\mbox{[1 Mark]}
+
+\item[(4)] Write a function that takes a stock market symbol as
+ argument (you can assume it is a valid one, like GOOG or AAPL). The
+ function calculates the \underline{dates} when you should have
+ bought the corresponding shares (lowest price) and when you should
+ have sold them (highest price).\hfill\mbox{[1 Mark]}
\end{itemize}
\noindent
-\textbf{Hints:} useful list functions: \texttt{.drop(n)},
-\texttt{.take(n)} for dropping or taking some elements in a list,
-\texttt{.getLines} for separating lines in a string;
-\texttt{.sortBy(\_.\_2)} sorts a list of pairs according to the second
-elements in the pairs---the sorting is done from smallest to highest;
-useful \texttt{Map} functions: \texttt{.toMap} converts a list of
-pairs into a \texttt{Map}, \texttt{.isDefinedAt(k)} tests whether the
-map is defined at that key, that is would produce a result when
-called with this key; useful data functions: \texttt{Source.fromURL},
-\texttt{Source.fromFile} for obtaining a webpage and reading a file.
+\textbf{Test Data:}
+In case of Google, the financial data records 3077 entries starting
+from 2004-08-19 until 2016-11-04 (which is the last entry on the day
+when I prepared the course work...namely on 6 November; remember stock
+markets are typically closed on weekends and no financial data is
+produced then; also I did not count the header line). The lowest
+shareprice for Google was on 2004-09-03 with \$49.95513 per share and the
+highest on 2016-10-24 with \$813.109985 per share.\bigskip
-\newpage
-
-\subsection*{Advanced Part 3 (4 Marks)}
+\subsection*{Advanced Part 3 (3 Marks)}
A purely fictional character named Mr T.~Drumb inherited in 1978
approximately 200 Million Dollar from his father. Mr Drumb prides
@@ -227,10 +202,10 @@
estimated he is 3 Billion Dollar worth (one is not sure, of course,
because Mr Drumb refuses to make his tax records public).
-Since the question about Mr Drumb's business acumen remains open,
-let's do a quick back-of-the-envelope calculation in Scala whether his
-claim has any merit. Let's suppose we are given \$100 in 1978 and we
-follow a really dumb investment strategy, namely:
+Since the question about Mr Drumb's business acumen remains, let's do a
+quick back-of-the-envelope calculation in Scala whether his claim has
+any merit. Let's suppose we are given \$100 in 1978 and we follow a
+really dumb investment strategy, namely:
\begin{itemize}
\item We blindly choose a portfolio of stocks, say some Blue-Chip stocks
@@ -244,60 +219,50 @@
everything, and re-invest our (hopefully) increased money in again
the stocks from our portfolio (there might be more stocks available,
if companies from our portfolio got listed in that year, or less if
- some companies went bust or were de-listed).
-\item We do this for 39 years until January 2017 and check what would
+ some companies went bust or de-listed).
+\item We do this for 38 years until January 2016 and check what would
have become out of our \$100.
-\end{itemize}
-
-\noindent
-Until Yahoo was bought by Altaba this summer, historical stock market
-data for such back-of-the-envelope calculations was freely available
-online. Unfortuantely nowadays this kind of data is difficult to
-obtain, unless you are prepared to pay extortionate prices or be
-severely rate-limited. Therefore this coursework comes with a number
-of files containing CSV-lists with the historical stock prices for the
-companies in our portfolios. Use these files for the following
-tasks.\bigskip
+\end{itemize}\medskip
\noindent
\textbf{Tasks (file drumb.scala):}
\begin{itemize}
-\item[(1.a)] Write a function \texttt{get\_january\_data} that takes a
- stock symbol and a year as arguments. The function reads the
- corresponding CSV-file and returns the list of strings that start
- with the given year (each line in the CSV-list is of the form
- \texttt{year-01-someday,someprice}).
+\item[(1.a)] Write a function that queries the Yahoo financial data
+ service and obtains the first trade (adjusted close price) of a
+ stock symbol and a year. A problem is that normally a stock exchange
+ is not open on 1st of January, but depending on the day of the week
+ on a later day (maybe 3rd or 4th). The easiest way to solve this
+ problem is to obtain the whole January data for a stock symbol as
+ CSV-list and then select the earliest entry in this list. For this
+ you can specify a date range with the Yahoo service. For example if
+ you want to obtain all January data for Google in 2000, you can form
+ the query:\mbox{}\\[-8mm]
+
+ \begin{center}\small
+ \mbox{\url{http://ichart.yahoo.com/table.csv?s=GOOG&a=0&b=1&c=2000&d=1&e=1&f=2000}}
+ \end{center}
-\item[(1.b)] Write a function \texttt{get\_first\_price} that takes
- again a stock symbol and a year as arguments. It should return the
- first January price for the stock symbol in given the year. For this
- it uses the list of strings generated by
- \texttt{get\_january\_data}. A problem is that normally a stock
- exchange is not open on 1st of January, but depending on the day of
- the week on a later day (maybe 3rd or 4th). The easiest way to solve
- this problem is to obtain the whole January data for a stock symbol
- and then select the earliest, or first, entry in this list. The
- stock price of this entry should be converted into a double. Such a
- price might not exist, in case the company does not exist in the given
- year. For example, if you query for Google in January of 1980, then
- clearly Google did not exist yet. Therefore you are asked to
- return a trade price as \texttt{Option[Double]}\ldots\texttt{None}
- will be the value for when no price exists.
+ For other companies and years, you need to change the stock symbol
+ (\texttt{GOOG}) and the year \texttt{2000} (in the \texttt{c} and
+ \texttt{f} argument of the query). Such a request might fail, if the
+ company does not exist during this period. For example, if you query
+ for Google in January of 1980, then clearly Google did not exists yet.
+ Therefore you are asked to return a trade price as
+ \texttt{Option[Double]}.
-\item[(1.c)] Write a function \texttt{get\_prices} that takes a
- portfolio (a list of stock symbols), a years range and gets all the
- first trading prices for each year in the range. You should organise
- this as a list of lists of \texttt{Option[Double]}'s. The inner
- lists are for all stock symbols from the portfolio and the outer
- list for the years. For example for Google and Apple in years 2010
- (first line), 2011 (second line) and 2012 (third line) you obtain:
+\item[(1.b)] Write a function that takes a portfolio (a list of stock symbols),
+ a years range and gets all the first trading prices for each year. You should
+ organise this as a list of lists of \texttt{Option[Double]}'s. The inner lists
+ are for all stock symbols from the portfolio and the outer list for the years.
+ For example for Google and Apple in years 2010 (first line), 2011
+ (second line) and 2012 (third line) you obtain:
\begin{verbatim}
- List(List(Some(311.349976), Some(27.505054)),
- List(Some(300.222351), Some(42.357094)),
- List(Some(330.555054), Some(52.852215)))
-\end{verbatim}\hfill[2 Marks]
+ List(List(Some(313.062468), Some(27.847252)),
+ List(Some(301.873641), Some(42.884065)),
+ List(Some(332.373186), Some(53.509768)))
+\end{verbatim}\hfill[1 Mark]
\item[(2.a)] Write a function that calculates the \emph{change factor} (delta)
for how a stock price has changed from one year to the next. This is
@@ -308,9 +273,6 @@
\frac{price_{new} - price_{old}}{price_{old}}
\]
- If the change factor is defined, you should return it
- as \texttt{Some(change factor)}; if not, you should return
- \texttt{None}.
\item[(2.b)] Write a function that calculates all change factors
(deltas) for the prices we obtained under Task 1. For the running
@@ -318,15 +280,12 @@
obtain 4 change factors:
\begin{verbatim}
- List(List(Some(-0.03573992567129673), Some(0.5399749442411563))
- List(Some(0.10103412653643493), Some(0.2477771728154912)))
+ List(List(Some(-0.03573991820699504), Some(0.5399747522663995))
+ List(Some(0.10103414428290529), Some(0.24777742035415723)))
\end{verbatim}
That means Google did a bit badly in 2010, while Apple did very well.
- Both did OK in 2011. Make sure you handle the cases where a company is
- not listed in a year. In such cases the change factor should be \texttt{None}
- (see 2.a).\\
- \mbox{}\hfill\mbox{[1 Mark]}
+ Both did OK in 2011.\hfill\mbox{[1 Mark]}
\item[(3.a)] Write a function that calculates the ``yield'', or
balance, for one year for our portfolio. This function takes the
@@ -335,11 +294,11 @@
unchanged. Otherwise we invest in each existing company an equal
amount of our balance. Using the change factors computed under Task
2, calculate the new balance. Say we had \$100 in 2010, we would have
- received in our running example involving Google and Apple:
+ received in our running example
\begin{verbatim}
- $50 * -0.03573992567129673 + $50 * 0.5399749442411563
- = $25.21175092849298
+ $50 * -0.03573991820699504 + $50 * 0.5399747522663995
+ = $25.211741702970222
\end{verbatim}
as profit for that year, and our new balance for 2011 is \$125 when
@@ -347,32 +306,16 @@
\item[(3.b)] Write a function that calculates the overall balance
for a range of years where each year the yearly profit is compounded to
- the new balances and then re-invested into our portfolio.\\
- \mbox{}\hfill\mbox{[1 Mark]}
+ the new balances and then re-invested into our portfolio.\mbox{}\hfill\mbox{[1 Mark]}
\end{itemize}\medskip
\noindent
\textbf{Test Data:} File \texttt{drumb.scala} contains two portfolios
collected from the S\&P 500, one for blue-chip companies, including
-Facebook, Amazon and Baidu; and another for listed real-estate
-companies, whose names I have never heard of. Following the dumb
-investment strategy from 1978 until 2017 would have turned a starting
-balance of \$100 into roughly \$30,895 for real estate and a whopping
-\$349,597 for blue chips. Note when comparing these results with your
-own calculations: there might be some small rounding errors, which
-when compounded lead to moderately different values.\bigskip
-
-\noindent
-\textbf{Hints:} useful string functions: \texttt{.startsWith(...)} for
-checking whether a string has a given prefix, \texttt{\_ ++ \_} for
-concatenating two strings; useful option functions: \texttt{.flatten}
-flattens a list of options such that it filters way all
-\texttt{None}'s, \texttt{Try(...) getOrElse ...} runs some code that
-might raise an exception---if yes, then a default value can be given;
-useful list functions: \texttt{.head} for obtaining the first element
-in a non-empty list, \texttt{.length} for the length of a
-list.\bigskip
-
+Facebook, Amazon and Baidu; and another for listed real-estate companies, whose
+names I have never heard of. Following the dumb investment strategy
+from 1978 until 2016 would have turned a starting balance of \$100
+into \$23,794 for real estate and a whopping \$524,609 for blue chips.\medskip
\noindent
\textbf{Moral:} Reflecting on our assumptions, we are over-estimating
@@ -382,9 +325,11 @@
of companies that went bust or were de-listed over the years.
So where does this leave our fictional character Mr T.~Drumb? Well, given
his inheritance, a really dumb investment strategy would have done
-equally well, if not much better.\medskip
+equally well, if not much better.
+About rounding errors: \url{https://www.youtube.com/watch?v=pQs_wx8eoQ8}
+(PBS Infinity Series).
\end{document}
%%% Local Variables:
--- a/progs/lecture2.scala Wed Jul 04 14:48:05 2018 +0100
+++ b/progs/lecture2.scala Fri Oct 05 11:23:15 2018 +0100
@@ -2,102 +2,34 @@
//=================
-// the pain with overloaded math operations
-
-(100 / 4)
-
-(100 / 3)
-
-(100.toDouble / 3.toDouble)
-
-
-// For-Comprehensions again
-//==========================
-
-def square(n: Int) : Int = n * n
-
-for (n <- (1 to 10).toList) yield {
- val res = square(n)
- res
-}
-
-// like in functions, the "last" item inside the yield
-// will be returned; the last item is not necessarily
-// the last line
-
-for (n <- (1 to 10).toList) yield {
- if (n % 2 == 0) n
- else square(n)
-}
-
-
-// ...please, please do not write:
-val lst = List(1, 2, 3, 4, 5, 6, 7, 8, 9)
-
-for (i <- (0 until lst.length).toList) yield square(lst(i))
-
-// this is just so prone to off-by-one errors;
-// write instead
-
-for (e <- lst; if (e % 2) == 0; if (e != 4)) yield square(e)
-
-
-//this works for sets as well
-val st = Set(1, 2, 3, 4, 5, 6, 7, 8, 9)
-
-for (e <- st) yield {
- if (e < 5) e else square(e)
-}
-
-
-
-// Side-Effects
-//==============
-
-// with only a side-effect (no list is produced),
-// for has no "yield"
-
-for (n <- (1 to 10)) println(n)
-
-
-for (n <- (1 to 10)) {
- print("The number is: ")
- print(n)
- print("\n")
-}
-
-
-
-
-// know when to use yield and when not:
-
-val test =
- for (e <- Set(1, 2, 3, 4, 5, 6, 7, 8, 9); if e < 5) yield square(e)
-
-
-
// Option type
//=============
-//in Java, if something unusually happens, you return null;
+//in Java if something unusually happens, you return null;
//in Scala you use Option
// - if the value is present, you use Some(value)
// - if no value is present, you use None
-List(7,24,3,4,5,6).find(_ < 4)
+List(7,2,3,4,5,6).find(_ < 4)
List(5,6,7,8,9).find(_ < 4)
-List(7,2,3,4,5,6).filter(_ < 4)
-// some operations on Option's
+// Values in types
+//
+// Boolean:
+// Int:
+// String:
+//
+// Option[String]:
+//
+
val lst = List(None, Some(1), Some(2), None, Some(3))
lst.flatten
-Some(10).get
-None.get
+Some(1).get
Some(1).isDefined
None.isDefined
@@ -108,55 +40,38 @@
if (y == 0) None else Some(x / y)
}
-// use .getOrElse is for setting a default value
+// getOrElse is for setting a default value
val lst = List(None, Some(1), Some(2), None, Some(3))
-
for (x <- lst) yield x.getOrElse(0)
-// error handling with Options (no exceptions)
-//
-// Try(....)
+// error handling with Option (no exceptions)
//
// Try(something).getOrElse(what_to_do_in_an_exception)
//
import scala.util._
-
-Try(1 + 3)
-Try(9 / 0)
-
-Try(9 / 3).getOrElse(42)
-Try(9 / 0).getOrElse(42)
-
-
import io.Source
-val my_url = """https://nms.kcl.ac.uk/christian.urban"""
-//val my_url = """https://nms.kcl.ac.uk/christan.urban""" // misspelled
-
-Source.fromURL(my_url).mkString
+Source.fromURL("""http://www.inf.kcl.ac.uk/staff/urbanc/""").mkString
-Try(Source.fromURL(my_url).mkString).getOrElse("")
+Try(Source.fromURL("""http://www.inf.kcl.ac.uk/staff/urbanc/""").mkString).getOrElse("")
-Try(Some(Source.fromURL(my_url).mkString)).getOrElse(None)
-
+Try(Some(Source.fromURL("""http://www.inf.kcl.ac.uk/staff/urbanc/""").mkString)).getOrElse(None)
// a function that turns strings into numbers
-Integer.parseInt("1234")
-
+Integer.parseInt("12u34")
def get_me_an_int(s: String): Option[Int] =
Try(Some(Integer.parseInt(s))).getOrElse(None)
val lst = List("12345", "foo", "5432", "bar", "x21")
-
for (x <- lst) yield get_me_an_int(x)
// summing all the numbers
-val sum = (for (i <- lst) yield get_me_an_int(i)).flatten.sum
+val sum = lst.flatMap(get_me_an_int(_)).sum
// This may not look any better than working with null in Java, but to
@@ -165,31 +80,215 @@
// write that function.
//
// In Java, if you didn't write this function, you'd have to depend on
-// the Javadoc of get_me_an_int. If you didn't look at the Javadoc,
+// the Javadoc of the get_me_an_int. If you didn't look at the Javadoc,
// you might not know that get_me_an_int could return a null, and your
// code could potentially throw a NullPointerException.
+
// even Scala is not immune to problems like this:
-List(5,6,7,8,9).indexOf(42)
+List(5,6,7,8,9).indexOf(7)
+
+
+
+
+
+// Type abbreviations
+//====================
+
+// some syntactic convenience
+type Pos = (int, Int)
+
+type Board = List[List[Int]]
+
+
+
+// Implicits
+//===========
+//
+// for example adding your own methods to Strings:
+// imagine you want to increment strings, like
+//
+// "HAL".increment
+//
+// you can avoid ugly fudges, like a MyString, by
+// using implicit conversions
+
+
+implicit class MyString(s: String) {
+ def increment = for (c <- s) yield (c + 1).toChar
+}
+
+"HAL".increment
-// ... how are we supposed to know that this returns -1
+// No return in Scala
+//====================
+
+//You should not use "return" in Scala:
+//
+// A return expression, when evaluated, abandons the
+// current computation and returns to the caller of the
+// function in which return appears."
+
+def sq1(x: Int): Int = x * x
+def sq2(x: Int): Int = return x * x
+
+def sumq(ls: List[Int]): Int = {
+ (for (x <- ls) yield (return x * x)).sum[Int]
+}
+
+sumq(List(1,2,3,4))
+
+
+// last expression in a function is the return statement
+def square(x: Int): Int = {
+ println(s"The argument is ${x}.")
+ x * x
+}
+
+
+
+// Pattern Matching
+//==================
+
+// A powerful tool which is supposed to come to Java in a few years
+// time (https://www.youtube.com/watch?v=oGll155-vuQ)...Scala already
+// has it for many years ;o)
+
+// The general schema:
+//
+// expression match {
+// case pattern1 => expression1
+// case pattern2 => expression2
+// ...
+// case patternN => expressionN
+// }
+
+
+
+
+// remember
+val lst = List(None, Some(1), Some(2), None, Some(3)).flatten
+
+
+def my_flatten(xs: List[Option[Int]]): List[Int] = {
+ ...
+}
+
+
+def my_flatten(lst: List[Option[Int]]): List[Int] = lst match {
+ case Nil => Nil
+ case None::xs => my_flatten(xs)
+ case Some(n)::xs => n::my_flatten(xs)
+}
-//other example for options...NaN
-val squareRoot: PartialFunction[Double, Double] = {
- case d: Double if d > 0 => Math.sqrt(d)
+// another example
+def get_me_a_string(n: Int): String = n match {
+ case 0 => "zero"
+ case 1 => "one"
+ case 2 => "two"
+ case _ => "many"
+}
+
+get_me_a_string(0)
+
+// you can also have cases combined
+def season(month: String) = month match {
+ case "March" | "April" | "May" => "It's spring"
+ case "June" | "July" | "August" => "It's summer"
+ case "September" | "October" | "November" => "It's autumn"
+ case "December" | "January" | "February" => "It's winter"
+}
+
+println(season("November"))
+
+// What happens if no case matches?
+
+println(season("foobar"))
+
+// fizz buzz
+def fizz_buzz(n: Int) : String = (n % 3, n % 5) match {
+ case (0, 0) => "fizz buzz"
+ case (0, _) => "fizz"
+ case (_, 0) => "buzz"
+ case _ => n.toString
+}
+
+for (n <- 0 to 20)
+ println(fizz_buzz(n))
+
+
+// User-defined Datatypes
+//========================
+
+abstract class Tree
+case class Node(elem: Int, left: Tree, right: Tree) extends Tree
+case class Leaf() extends Tree
+
+
+def insert(tr: Tree, n: Int): Tree = tr match {
+ case Leaf() => Node(n, Leaf(), Leaf())
+ case Node(m, left, right) =>
+ if (n == m) Node(m, left, right)
+ else if (n < m) Node(m, insert(left, n), right)
+ else Node(m, left, insert(right, n))
}
-val list: List[Double] = List(4, 16, 25, -9)
+
+val t1 = Node(4, Node(2, Leaf(), Leaf()), Node(7, Leaf(), Leaf()))
+insert(t1, 3)
+
+def depth(tr: Tree): Int = tr match {
+ case Leaf() => 0
+ case Node(_, left, right) => 1 + List(depth(left), depth(right)).max
+}
+
+
+def balance(tr: Tree): Int = tr match {
+ case Leaf() => 0
+ case Node(_, left, right) => depth(left) - depth(right)
+}
+
+balance(insert(t1, 3))
+
+// another example
+
+abstract class Person
+case class King() extends Person
+case class Peer(deg: String, terr: String, succ: Int) extends Person
+case class Knight(name: String) extends Person
+case class Peasant(name: String) extends Person
+case class Clown() extends Person
-val result = list.map(Math.sqrt)
-// => result: List[Double] = List(2.0, 4.0, 5.0, NaN)
+def title(p: Person): String = p match {
+ case King() => "His Majesty the King"
+ case Peer(deg, terr, _) => s"The ${deg} of ${terr}"
+ case Knight(name) => s"Sir ${name}"
+ case Peasant(name) => name
+}
-val result = list.collect(squareRoot)
-// => result: List[Double] = List(2.0, 4.0, 5.0)
+def superior(p1: Person, p2: Person): Boolean = (p1, p2) match {
+ case (King(), _) => true
+ case (Peer(_,_,_), Knight(_)) => true
+ case (Peer(_,_,_), Peasant(_)) => true
+ case (Peer(_,_,_), Clown()) => true
+ case (Knight(_), Peasant(_)) => true
+ case (Knight(_), Clown()) => true
+ case (Clown(), Peasant(_)) => true
+ case _ => false
+}
+
+val people = List(Knight("David"),
+ Peer("Duke", "Norfolk", 84),
+ Peasant("Christian"),
+ King(),
+ Clown())
+
+println(people.sortWith(superior(_, _)).mkString(", "))
+
// Higher-Order Functions
@@ -199,91 +298,35 @@
val lst = (1 to 10).toList
-def even(x: Int) : Boolean = x % 2 == 0
-def odd(x: Int) : Boolean = x % 2 == 1
+def even(x: Int): Boolean = x % 2 == 0
+def odd(x: Int): Boolean = x % 2 == 1
-lst.filter(x => even(x) && odd(x))
+lst.filter(x => even(x))
lst.filter(even(_))
-lst.filter(odd && even)
+lst.filter(even)
lst.find(_ > 8)
-// map applies a function to each element of a list
-
def square(x: Int): Int = x * x
-val lst = (1 to 10).toList
lst.map(square)
lst.map(square).filter(_ > 4)
lst.map(square).filter(_ > 4).map(square)
-// map works for most collection types, including sets
-Set(1, 3, 6).map(square).filter(_ > 4)
-
-
-val l = List((1, 3),(2, 4),(4, 1),(6, 2))
-
-l.map(square(_._1))
-
-
-// Why are functions as arguments useful?
-//
-// Consider the sum between a and b:
-
-def sumInts(a: Int, b: Int) : Int =
- if (a > b) 0 else a + sumInts(a + 1, b)
-
-
-sumInts(10, 16)
-
-// sum squares
-def square(n: Int) : Int = n * n
-
-def sumSquares(a: Int, b: Int) : Int =
- if (a > b) 0 else square(a) + sumSquares(a + 1, b)
-
-sumSquares(2, 6)
+// in my collatz.scala
+//(1 to bnd).map(i => (collatz(i), i)).maxBy(_._1)
-// sum factorials
-def fact(n: Int) : Int =
- if (n == 0) 1 else n * fact(n - 1)
-
-def sumFacts(a: Int, b: Int) : Int =
- if (a > b) 0 else fact(a) + sumFacts(a + 1, b)
-
-sumFacts(2, 6)
+// type of functions, for example f: Int => Int
-
-
-// You can see the pattern....can we simplify our work?
-// The type of functions from ints to ints: Int => Int
-
-def sum(f: Int => Int, a: Int, b: Int) : Int = {
- if (a > b) 0
- else f(a) + sum(f, a + 1, b)
+def my_map_int(lst: List[Int], f: Int => Int): List[Int] = lst match {
+ case Nil => Nil
+ case x::xs => f(x)::my_map_int(xs, f)
}
-
-def sumSquares(a: Int, b: Int) : Int = sum(square, a, b)
-def sumFacts(a: Int, b: Int) : Int = sum(fact, a, b)
-
-// What should we do for sumInts?
-
-def id(n: Int) : Int = n
-def sumInts(a: Int, b: Int) : Int = sum(id, a, b)
-
-sumInts(10, 12)
-
-
-// Anonymous Functions: You can also write:
-
-def sumCubes(a: Int, b: Int) : Int = sum(x => x * x * x, a, b)
-def sumSquares(a: Int, b: Int) : Int = sum(x => x * x, a, b)
-def sumInts(a: Int, b: Int) : Int = sum(x => x, a, b)
-
+my_map_int(lst, square)
// other function types
//
@@ -292,204 +335,56 @@
// ...
-// an aside: partial application
-
-def add(a: Int)(b: Int) : Int = a + b
-def add_abc(a: Int)(b: Int)(c: Int) : Int = a + b + c
-
-val add2 : Int => Int = add(2)
-add2(5)
-
-val add2_bc : Int => Int => Int = add_abc(2)
-val add2_9_c : Int => Int = add2_bc(9)
-
-add2_9_c(10)
-
-sum(add(2), 0, 2)
-sum(add(10), 0, 2)
-
-
-
-
-// some automatic timing in each evaluation
-package wrappers {
-
- object wrap {
-
- def timed[R](block: => R): R = {
- val t0 = System.nanoTime()
- val result = block
- println("Elapsed time: " + (System.nanoTime - t0) + "ns")
- result
- }
-
- def apply[A](a: => A): A = {
- timed(a)
- }
- }
+def sumOf(f: Int => Int, lst: List[Int]): Int = lst match {
+ case Nil => 0
+ case x::xs => f(x) + sumOf(f, xs)
}
-$intp.setExecutionWrapper("wrappers.wrap")
+def sum_squares(lst: List[Int]) = sumOf(square, lst)
+def sum_cubes(lst: List[Int]) = sumOf(x => x * x * x, lst)
-// Iteration
+sum_squares(lst)
+sum_cubes(lst)
+
+// lets try it factorial
+def fact(n: Int): Int = ...
-def fib(n: Int) : Int =
- if (n <= 1) 1 else fib(n - 1) + fib(n - 2)
+def sum_fact(lst: List[Int]) = sumOf(fact, lst)
+sum_fact(lst)
-fib(10)
-
+// Avoid being mutable
+//=====================
-Iterator.iterate((1,1)){ case (n: Int, m: Int) => (n + m, n) }.drop(9).next
-
+// a student showed me...
+import scala.collection.mutable.ListBuffer
-// Function Composition
-//======================
-
-// How can be Higher-Order Functions and Options be helpful?
-
-def add_footer(msg: String) : String = msg ++ " - Sent from iOS"
-
-def valid_msg(msg: String) : Boolean = msg.size <= 140
-
-def duplicate(s: String) : String = s ++ s
+def collatz_max(bnd: Long): (Long, Long) = {
+ val colNos = ListBuffer[(Long, Long)]()
+ for (i <- (1L to bnd).toList) colNos += ((collatz(i), i))
+ colNos.max
+}
-// they compose very nicely, e.g
-
-valid_msg(add_footer("Hello World"))
-valid_msg(duplicate(duplicate(add_footer("Helloooooooooooooooooo World"))))
-
-// but not all functions do
-// first_word: let's first do it the ugly Java way using null:
-
-def first_word(msg: String) : String = {
- val words = msg.split(" ")
- if (words(0) != "") words(0) else null
+def collatz_max(bnd: Long): (Long, Long) = {
+ (1L to bnd).map((i) => (collatz(i), i)).maxBy(_._1)
}
-duplicate(first_word("Hello World"))
-duplicate(first_word(""))
-
-def extended_duplicate(s: String) : String =
- if (s != null) s ++ s else null
-
-extended_duplicate(first_word(""))
-
-// but this is against the rules of the game: we do not want
-// to change duplicate, because first_word might return null
-
-
-// Avoid always null!
-def better_first_word(msg: String) : Option[String] = {
- val words = msg.split(" ")
- if (words(0) != "") Some(words(0)) else None
+//views -> lazy collection
+def collatz_max(bnd: Long): (Long, Long) = {
+ (1L to bnd).view.map((i) => (collatz(i), i)).maxBy(_._1)
}
-better_first_word("Hello World").map(duplicate)
-
-better_first_word("Hello World").map(duplicate)
-better_first_word("").map(duplicate).map(duplicate).map(valid_msg)
+// raises a GC exception
+(1 to 1000000000).filter(_ % 2 == 0).take(10).toList
+// ==> java.lang.OutOfMemoryError: GC overhead limit exceeded
-better_first_word("").map(duplicate)
-better_first_word("").map(duplicate).map(valid_msg)
-
-
+(1 to 1000000000).view.filter(_ % 2 == 0).take(10).toList
-// Problems with mutability and parallel computations
-//====================================================
-
-def count_intersection(A: Set[Int], B: Set[Int]) : Int = {
- var count = 0
- for (x <- A; if (B contains x)) count += 1
- count
-}
-
-val A = (1 to 1000).toSet
-val B = (1 to 1000 by 4).toSet
-
-count_intersection(A, B)
-
-// but do not try to add .par to the for-loop above,
-// otherwise you will be caught in race-condition hell.
-
-
-//propper parallel version
-def count_intersection2(A: Set[Int], B: Set[Int]) : Int =
- A.par.count(x => B contains x)
-
-count_intersection2(A, B)
-
-
-//for measuring time
-def time_needed[T](n: Int, code: => T) = {
- val start = System.nanoTime()
- for (i <- (0 to n)) code
- val end = System.nanoTime()
- (end - start) / 1.0e9
-}
-
-val A = (1 to 1000000).toSet
-val B = (1 to 1000000 by 4).toSet
-
-time_needed(10, count_intersection(A, B))
-time_needed(10, count_intersection2(A, B))
-
-
-
-
-
-
-// No returns in Scala
-//====================
-
-// You should not use "return" in Scala:
-//
-// A return expression, when evaluated, abandons the
-// current computation and returns to the caller of the
-// function in which return appears."
-
-def sq1(x: Int): Int = x * x
-def sumq(ls: List[Int]): Int =
- ls.map(x => x * x).sum
-
-
-
-
-def sq2(x: Int): Int = return x * x
-
-def sumq(ls: List[Int]): Int = {
- ls.map(sq1).sum[Int]
-}
-
-sumq(List(1, 2, 3, 4))
-
-
-
-def sumq(ls: List[Int]): Int = {
- val sqs : List[Int] = for (x <- ls) yield (return x * x)
- sqs.sum
-}
-
-sumq(List(1, 2, 3, 4))
-
-
-
-// Type abbreviations
-//====================
-
-// some syntactic convenience
-
-type Pos = (int, Int)
-type Board = List[List[Int]]
-
-
-
-
-// Sudoku in Scala
-//=================
+// Sudoku
+//========
// THE POINT OF THIS CODE IS NOT TO BE SUPER
// EFFICIENT AND FAST, just explaining exhaustive
@@ -514,19 +409,15 @@
val indexes = (0 to 8).toList
-def empty(game: String) = game.indexOf(EmptyValue)
-def isDone(game: String) = empty(game) == -1
-def emptyPosition(game: String) =
- (empty(game) % MaxValue, empty(game) / MaxValue)
-def get_row(game: String, y: Int) =
- indexes.map(col => game(y * MaxValue + col))
-def get_col(game: String, x: Int) =
- indexes.map(row => game(x + row * MaxValue))
+def empty(game: String) = game.indexOf(EmptyValue)
+def isDone(game: String) = empty(game) == -1
+def emptyPosition(game: String) = (empty(game) % MaxValue, empty(game) / MaxValue)
-get_row(game0, 3)
-get_col(game0, 0)
+
+def get_row(game: String, y: Int) = indexes.map(col => game(y * MaxValue + col))
+def get_col(game: String, x: Int) = indexes.map(row => game(x + row * MaxValue))
def get_box(game: String, pos: Pos): List[Char] = {
def base(p: Int): Int = (p / 3) * 3
@@ -536,34 +427,28 @@
(x0 until x0 + 3).toList.flatMap(x => ys.map(y => game(x + y * MaxValue)))
}
-get_box(game0, (0, 0))
-get_box(game0, (1, 1))
-get_box(game0, (2, 1))
-// this is not mutable!!
-def update(game: String, pos: Int, value: Char): String =
- game.updated(pos, value)
+//get_row(game0, 0)
+//get_row(game0, 1)
+//get_box(game0, (3,1))
+
+def update(game: String, pos: Int, value: Char): String = game.updated(pos, value)
def toAvoid(game: String, pos: Pos): List[Char] =
(get_col(game, pos._1) ++ get_row(game, pos._2) ++ get_box(game, pos))
-def candidates(game: String, pos: Pos): List[Char] =
- allValues.diff(toAvoid(game,pos))
+def candidates(game: String, pos: Pos): List[Char] = allValues diff toAvoid(game,pos)
//candidates(game0, (0,0))
-def pretty(game: String): String =
- "\n" + (game sliding (MaxValue, MaxValue) mkString "\n")
+def pretty(game: String): String = "\n" + (game sliding (MaxValue, MaxValue) mkString "\n")
def search(game: String): List[String] = {
if (isDone(game)) List(game)
- else {
- val cs = candidates(game, emptyPosition(game))
- cs.par.map(c => search(update(game, empty(game), c))).toList.flatten
- }
+ else
+ candidates(game, emptyPosition(game)).map(c => search(update(game, empty(game), c))).toList.flatten
}
-search(game0).map(pretty)
val game1 = """23.915...
|...2..54.
@@ -575,9 +460,8 @@
|.16..7...
|...329..1""".stripMargin.replaceAll("\\n", "")
-search(game1).map(pretty)
-// game that is in the hard(er) category
+// game that is in the hard category
val game2 = """8........
|..36.....
|.7..9.2..
@@ -600,8 +484,8 @@
|9724...5.""".stripMargin.replaceAll("\\n", "")
-search(game2).map(pretty)
-search(game3).map(pretty)
+search(game0).map(pretty)
+search(game1).map(pretty)
// for measuring time
def time_needed[T](i: Int, code: => T) = {
@@ -613,11 +497,10 @@
search(game2).map(pretty)
search(game3).distinct.length
-time_needed(1, search(game2))
-time_needed(1, search(game3))
+time_needed(3, search(game2))
+time_needed(3, search(game3))
-//===================
-// the end for today
+