diff -r 6fc7de0f23ba -r 86d1e2e6c211 slides/slides07.tex --- a/slides/slides07.tex Mon Nov 25 20:31:01 2013 +0000 +++ b/slides/slides07.tex Tue Nov 26 00:01:50 2013 +0000 @@ -726,366 +726,9 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Privacy, Anonymity et al} - -Some terminology: - -\begin{itemize} -\item \alert{secrecy} is the mechanism used to limit the number of -principals with access to information (eg, cryptography or access controls) - -\item \alert{confidentiality} is the obligation to protect the secrets of other people -or organizations (secrecy for the benefit of an organisation) - -\item \alert{anonymity} is the ability to leave no evidence of an activity (eg, sharing a secret) - -\item \alert{privacy} is the ability or right to protect your personal secrets -(secrecy for the benefit of an individual) - -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{Privacy vs Anonymity} - -\begin{itemize} -\item everybody agrees that anonymity has its uses (e.g., voting, whistleblowers, peer-review) -\end{itemize}\bigskip\bigskip\pause - - -But privacy?\bigskip\bigskip - -``You have zero privacy anyway. Get over it.''\\ -\hfill{}Scott Mcnealy (CEO of Sun)\bigskip\\ - - -If you have nothing to hide, you have nothing to fear. - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{Privacy} - -private data can be often used against me - -\begin{itemize} -\item if my location data becomes public, thieves will switch off their phones and help themselves in my home -\item if supermarkets can build a profile of what I buy, they can use it to their advantage (banks - mortgages) -\item my employer might not like my opinions\bigskip\pause - -\item one the other hand, Freedom-of-Information Act -\item medical data should be private, but medical research needs data -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{Privacy Problems} - -\begin{itemize} -\item Apple takes note of every dictation (send over the Internet to Apple) -\item markets often only work, if data is restricted (to build trust) -\item Social network can reveal data about you -\item have you tried the collusion extension for FireFox? -\item I do use Dropbox and store cards\bigskip -\item next week: anonymising data -\end{itemize} - -\begin{textblock}{5}(12,9.8) -\includegraphics[scale=0.2]{pics/gattaca.jpg}\\ -\small Gattaca (1997) -\end{textblock} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{Privacy} - -\begin{minipage}{1.05\textwidth} -\begin{itemize} -\item we \alert{do} want that government data is made public (free maps for example) -\item we \alert{do not} want that medical data becomes public (similarly tax data, school -records, job offers)\bigskip -\item personal information can potentially lead to fraud -(identity theft) -\end{itemize}\pause - -{\bf ``The reality'':} -\only<2>{\begin{itemize} -\item London Health Programmes lost in June last year unencrypted details of more than 8 million people -(no names, but postcodes and details such as gender, age and ethnic origin) -\end{itemize}} -\only<3>{\begin{itemize} -\item also in June last year, Sony got hacked: over 1M users' personal information, including passwords, email addresses, home addresses, dates of birth, and all Sony opt-in data associated with their accounts. -\end{itemize}} -\end{minipage} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Privacy and Big Data} - -Selected sources of ``Big Data'':\smallskip{} - -\begin{itemize} -\item Facebook -\begin{itemize} -\item 40+ Billion photos (100 PB) -\item 6 Billion messages daily (5 - 10 TB) -\item 900 Million users -\end{itemize} -\item Common Crawl -\begin{itemize} -\item covers 3.8 Billion webpages (2012 dataset) -\item 50 TB of data -\end{itemize} -\item Google -\begin{itemize} -\item 20 PB daily (2008) -\end{itemize} -\item Twitter -\begin{itemize} -\item 7 Million users in the UK -\item a company called Datasift is allowed to mine all tweets since 2010 -\item they charge 10k per month for other companies to target advertisement -\end{itemize} -\end{itemize}\pause - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Cookies\ldots} - -``We have published a new cookie policy. It explains what cookies are -and how we use them on our site. To learn more about cookies and -their benefits, please view our cookie policy.\medskip - -If you'd like to disable cookies on this device, please view our information -pages on 'How to manage cookies'. Please be aware that parts of the -site will not function correctly if you disable cookies. \medskip - -By closing this -message, you consent to our use of cookies on this device in accordance -with our cookie policy unless you have disabled them.'' - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Scare Tactics} - -The actual policy reads:\bigskip - -``As we explain in our Cookie Policy, cookies help you to get the most -out of our websites.\medskip - -If you do disable our cookies you may find that certain sections of our -website do not work. For example, you may have difficulties logging in -or viewing articles.'' - - - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Netflix Prize} - -Anonymity is \alert{necessary} for privacy, but \alert{not} enough!\bigskip - -\begin{itemize} -\item Netflix offered in 2006 (and every year until 2010) a 1 Mio \$ prize for improving their movie rating algorithm -\item dataset contained 10\% of all Netflix users (appr.~500K) -\item names were removed, but included numerical ratings as well as times of rating -\item some information was \alert{perturbed} (i.e., slightly modified) -\end{itemize} - -\hfill{\bf\alert{All OK?}} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Re-identification Attack} - -Two researchers analysed the data: - -\begin{itemize} -\item with 8 ratings (2 of them can be wrong) and corresponding dates that can have a margin 14-day error, 98\% of the -records can be identified -\item for 68\% only two ratings and dates are sufficient (for movie ratings outside the top 500)\bigskip\pause -\item they took 50 samples from IMDb (where people can reveal their identity) -\item 2 of them uniquely identified entries in the Netflix database (either by movie rating or by dates) -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{} - -\begin{itemize} -\item Birth data, postcode and gender (unique for\\ 87\% of the US population) -\item Preferences in movies (99\% of 500K for 8 ratings) -\end{itemize}\bigskip - -Therefore best practices / or even law (HIPAA, EU): - -\begin{itemize} -\item only year dates (age group for 90 years or over), -\item no postcodes (sector data is OK, similarly in the US)\\ -\textcolor{gray}{no names, addresses, account numbers, licence plates} -\item disclosure information needs to be retained for 5 years -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}<2>[c] -\frametitle{How to Safely Disclose Information?} - -\only<1>{ -\begin{itemize} -\item Assume you make a survey of 100 randomly chosen people. -\item Say 99\% of the surveyed people in the 10 - 40 age group have seen the -Gangnam video on youtube.\bigskip - -\item What can you infer about the rest of the population? -\end{itemize}} -\only<2>{ -\begin{itemize} -\item Is it possible to re-identify data later, if more data is released. \bigskip\bigskip\pause - -\item Not even releasing only aggregate information prevents re-identification attacks. -(GWAS was a public database of gene-frequency studies linked to diseases; -you only needed partial DNA information in order -to identify whether an individual was part of the study --- DB closed in 2008) -\end{itemize}} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Differential Privacy} - -\begin{center} -User\;\;\;\; -\begin{tabular}{c} -tell me \bl{$f(x)$} $\Rightarrow$\\ -$\Leftarrow$ \bl{$f(x) + \text{noise}$} -\end{tabular} -\;\;\;\;\begin{tabular}{@{}c} -Database\\ -\bl{$x_1, \ldots, x_n$} -\end{tabular} -\end{center} - - -\begin{itemize} -\item \bl{$f(x)$} can be released, if \bl{$f$} is insensitive to -individual entries \bl{$x_1, \ldots, x_n$}\\ -\item Intuition: whatever is learned from the dataset would be learned regardless of whether -\bl{$x_i$} participates\bigskip\pause - -\item Noised needed in order to prevent queries:\\ Christian's salary $=$ -\begin{center} -\bl{\large$\Sigma$} all staff $-$ \bl{\large$\Sigma$} all staff $\backslash$ Christian -\end{center} -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{Adding Noise} - -Adding noise is not as trivial as one would wish: - -\begin{itemize} -\item If I ask how many of three have seen the Gangnam video and get a result -as follows - -\begin{center} -\begin{tabular}{l|c} -Alice & yes\\ -Bob & no\\ -Charlie & yes\\ -\end{tabular} -\end{center} - -then I have to add a noise of \bl{$1$}. So answers would be in the -range of \bl{$1$} to \bl{$3$} - -\bigskip -\item But if I ask five questions for all the dataset (has seen Gangnam video, is male, below 30, \ldots), -then one individual can change the dataset by \bl{$5$} -\end{itemize} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{@{}c@{}}Take Home Point\end{tabular}} - -According to Ross Anderson: \bigskip -\begin{itemize} -\item Privacy in a big hospital is just about doable.\medskip -\item How do you enforce privacy in something as big as Google -or complex as Facebook? No body knows.\bigskip - -Similarly, big databases imposed by government -\end{itemize} - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{document}