slides/slides07.tex
changeset 138 86d1e2e6c211
parent 136 058504a45c34
equal deleted inserted replaced
137:6fc7de0f23ba 138:86d1e2e6c211
   724 \end{frame}}
   724 \end{frame}}
   725 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
   725 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
   726 
   726 
   727 
   727 
   728 
   728 
   729 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   729 
   730 \mode<presentation>{
   730 
   731 \begin{frame}[c]
   731 
   732 \frametitle{Privacy, Anonymity et al}
       
   733 
       
   734 Some terminology:
       
   735 
       
   736 \begin{itemize}
       
   737 \item \alert{secrecy} is the mechanism used to limit the number of 
       
   738 principals with access to information (eg, cryptography or access controls)
       
   739 
       
   740 \item \alert{confidentiality} is the obligation to protect the secrets of other people 
       
   741 or organizations (secrecy for the benefit of an organisation)
       
   742 
       
   743 \item \alert{anonymity} is the ability to leave no evidence of an activity (eg, sharing a secret)
       
   744 
       
   745 \item \alert{privacy} is the ability or right to protect your personal secrets 
       
   746 (secrecy for the benefit of an individual)
       
   747 
       
   748 \end{itemize}
       
   749 
       
   750 \end{frame}}
       
   751 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   752 
       
   753 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   754 \mode<presentation>{
       
   755 \begin{frame}[t]
       
   756 \frametitle{Privacy vs Anonymity}
       
   757 
       
   758 \begin{itemize}
       
   759 \item everybody agrees that anonymity has its uses (e.g., voting, whistleblowers, peer-review)
       
   760 \end{itemize}\bigskip\bigskip\pause
       
   761 
       
   762 
       
   763 But privacy?\bigskip\bigskip
       
   764 
       
   765 ``You have zero privacy anyway. Get over it.''\\
       
   766 \hfill{}Scott Mcnealy (CEO of Sun)\bigskip\\
       
   767 
       
   768 
       
   769 If you have nothing to hide, you have nothing to fear.
       
   770 
       
   771 \end{frame}}
       
   772 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   773 
       
   774 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   775 \mode<presentation>{
       
   776 \begin{frame}[t]
       
   777 \frametitle{Privacy}
       
   778 
       
   779 private data can be often used against me
       
   780 
       
   781 \begin{itemize}
       
   782 \item if my location data becomes public, thieves will switch off their phones and help themselves in my home
       
   783 \item if supermarkets can build a profile of what I buy, they can use it to their advantage (banks - mortgages)
       
   784 \item my employer might not like my opinions\bigskip\pause
       
   785 
       
   786 \item one the other hand, Freedom-of-Information Act 
       
   787 \item medical data should be private, but medical research needs data
       
   788 \end{itemize}
       
   789 
       
   790 \end{frame}}
       
   791 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   792 
       
   793 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   794 \mode<presentation>{
       
   795 \begin{frame}[t]
       
   796 \frametitle{Privacy Problems}
       
   797 
       
   798 \begin{itemize}
       
   799 \item Apple takes note of every dictation (send over the Internet to Apple)
       
   800 \item markets often only work, if data is restricted (to build trust)
       
   801 \item Social network can reveal data about you 
       
   802 \item have you tried the collusion extension for FireFox?
       
   803 \item I do use Dropbox and store cards\bigskip
       
   804 \item next week: anonymising data
       
   805 \end{itemize}
       
   806 
       
   807 \begin{textblock}{5}(12,9.8)
       
   808 \includegraphics[scale=0.2]{pics/gattaca.jpg}\\
       
   809 \small Gattaca (1997)
       
   810 \end{textblock}
       
   811 
       
   812 \end{frame}}
       
   813 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   814 
       
   815 
       
   816 
       
   817 
       
   818 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   819 \mode<presentation>{
       
   820 \begin{frame}[t]
       
   821 \frametitle{Privacy}
       
   822 
       
   823 \begin{minipage}{1.05\textwidth}
       
   824 \begin{itemize}
       
   825 \item we \alert{do} want that government data is made public (free maps for example)
       
   826 \item we \alert{do not} want that medical data becomes public (similarly tax data, school 
       
   827 records, job offers)\bigskip
       
   828 \item personal information can potentially lead to fraud 
       
   829 (identity theft)
       
   830 \end{itemize}\pause
       
   831 
       
   832 {\bf ``The reality'':}
       
   833 \only<2>{\begin{itemize}
       
   834 \item London Health Programmes lost in June last year unencrypted details of more than 8 million people
       
   835 (no names, but postcodes and details such as gender, age and ethnic origin)
       
   836 \end{itemize}}
       
   837 \only<3>{\begin{itemize}
       
   838 \item also in June last year, Sony got hacked: over 1M users' personal information, including passwords, email addresses, home addresses, dates of birth, and all Sony opt-in data associated with their accounts.
       
   839 \end{itemize}}
       
   840 \end{minipage}
       
   841 
       
   842 \end{frame}}
       
   843 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   844 
       
   845    
       
   846 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   847 \mode<presentation>{
       
   848 \begin{frame}[c]
       
   849 \frametitle{Privacy and Big Data}
       
   850 
       
   851 Selected sources of ``Big Data'':\smallskip{}
       
   852 
       
   853 \begin{itemize}
       
   854 \item Facebook 
       
   855 \begin{itemize}
       
   856 \item 40+ Billion photos (100 PB)
       
   857 \item 6 Billion messages daily (5 - 10 TB)
       
   858 \item 900 Million users  
       
   859 \end{itemize}
       
   860 \item Common Crawl
       
   861 \begin{itemize}
       
   862 \item covers 3.8 Billion webpages (2012 dataset)
       
   863 \item 50 TB of data
       
   864 \end{itemize}
       
   865 \item Google
       
   866 \begin{itemize}
       
   867 \item 20 PB daily (2008)
       
   868 \end{itemize}
       
   869 \item Twitter
       
   870 \begin{itemize}
       
   871 \item 7 Million users in the UK
       
   872 \item a company called Datasift is allowed to mine all tweets since 2010
       
   873 \item they charge 10k per month for other companies to target advertisement
       
   874 \end{itemize}
       
   875 \end{itemize}\pause
       
   876 
       
   877 
       
   878 \end{frame}}
       
   879 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   880 
       
   881 
       
   882 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   883 \mode<presentation>{
       
   884 \begin{frame}[c]
       
   885 \frametitle{Cookies\ldots}
       
   886 
       
   887 ``We have published a new cookie policy. It explains what cookies are 
       
   888 and how we use them on our site. To learn more about cookies and 
       
   889 their benefits, please view our cookie policy.\medskip
       
   890 
       
   891 If you'd like to disable cookies on this device, please view our information 
       
   892 pages on 'How to manage cookies'. Please be aware that parts of the 
       
   893 site will not function correctly if you disable cookies. \medskip
       
   894 
       
   895 By closing this 
       
   896 message, you consent to our use of cookies on this device in accordance 
       
   897 with our cookie policy unless you have disabled them.''
       
   898 
       
   899 
       
   900 \end{frame}}
       
   901 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   902 
       
   903 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   904 \mode<presentation>{
       
   905 \begin{frame}[c]
       
   906 \frametitle{Scare Tactics}
       
   907 
       
   908 The actual policy reads:\bigskip
       
   909 
       
   910 ``As we explain in our Cookie Policy, cookies help you to get the most 
       
   911 out of our websites.\medskip
       
   912 
       
   913 If you do disable our cookies you may find that certain sections of our 
       
   914 website do not work. For example, you may have difficulties logging in 
       
   915 or viewing articles.''
       
   916 
       
   917 
       
   918 
       
   919 
       
   920 \end{frame}}
       
   921 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   922 
       
   923 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   924 \mode<presentation>{
       
   925 \begin{frame}[c]
       
   926 \frametitle{Netflix Prize}
       
   927 
       
   928 Anonymity is \alert{necessary} for privacy, but \alert{not} enough!\bigskip
       
   929 
       
   930 \begin{itemize}
       
   931 \item Netflix offered in 2006 (and every year until 2010) a 1 Mio \$ prize for improving their movie rating algorithm
       
   932 \item dataset contained 10\% of all Netflix users (appr.~500K)
       
   933 \item names were removed, but included numerical ratings as well as times of rating
       
   934 \item some information was \alert{perturbed} (i.e., slightly modified)
       
   935 \end{itemize}
       
   936 
       
   937 \hfill{\bf\alert{All OK?}}
       
   938 
       
   939 \end{frame}}
       
   940 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   941 
       
   942 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   943 \mode<presentation>{
       
   944 \begin{frame}[c]
       
   945 \frametitle{Re-identification Attack}
       
   946 
       
   947 Two researchers analysed the data: 
       
   948 
       
   949 \begin{itemize}
       
   950 \item with 8 ratings (2 of them can be wrong) and corresponding dates that can have a margin 14-day error, 98\% of the
       
   951 records can be identified
       
   952 \item for 68\% only two ratings and dates are sufficient (for movie ratings outside the top 500)\bigskip\pause
       
   953 \item they took 50 samples from IMDb (where people can reveal their identity)
       
   954 \item 2 of them uniquely identified entries in the Netflix database (either by movie rating or by dates)
       
   955 \end{itemize}
       
   956 
       
   957 \end{frame}}
       
   958 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   959 
       
   960 
       
   961 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   962 \mode<presentation>{
       
   963 \begin{frame}[c]
       
   964 \frametitle{}
       
   965 
       
   966 \begin{itemize}
       
   967 \item Birth data, postcode and gender (unique for\\ 87\% of the US population)
       
   968 \item Preferences in movies (99\% of 500K for 8 ratings)
       
   969 \end{itemize}\bigskip
       
   970 
       
   971 Therefore best practices / or even law (HIPAA, EU): 
       
   972 
       
   973 \begin{itemize}
       
   974 \item only year dates (age group for 90 years or over), 
       
   975 \item no postcodes (sector data is OK, similarly in the US)\\
       
   976 \textcolor{gray}{no names, addresses, account numbers, licence plates}
       
   977 \item disclosure information needs to be retained for 5 years
       
   978 \end{itemize}
       
   979 
       
   980 \end{frame}}
       
   981 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   982 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   983 \mode<presentation>{
       
   984 \begin{frame}<2>[c]
       
   985 \frametitle{How to Safely Disclose Information?}
       
   986 
       
   987 \only<1>{
       
   988 \begin{itemize}
       
   989 \item Assume you make a survey of 100 randomly chosen people.
       
   990 \item Say 99\% of the surveyed people in the 10 - 40 age group have seen the
       
   991 Gangnam video on youtube.\bigskip
       
   992 
       
   993 \item What can you infer about the rest of the population? 
       
   994 \end{itemize}}
       
   995 \only<2>{
       
   996 \begin{itemize}
       
   997 \item Is it possible to re-identify data later, if more data is released. \bigskip\bigskip\pause
       
   998 
       
   999 \item Not even releasing only  aggregate information prevents re-identification attacks.
       
  1000 (GWAS was a public database of gene-frequency studies linked to diseases;
       
  1001 you only needed partial DNA information  in order
       
  1002 to identify whether an individual was part of the study --- DB closed in 2008) 
       
  1003 \end{itemize}}
       
  1004 
       
  1005 \end{frame}}
       
  1006 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
  1007      
       
  1008 
       
  1009 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
  1010 \mode<presentation>{
       
  1011 \begin{frame}[c]
       
  1012 \frametitle{Differential Privacy}
       
  1013 
       
  1014 \begin{center}
       
  1015 User\;\;\;\;    
       
  1016 \begin{tabular}{c}
       
  1017 tell me \bl{$f(x)$} $\Rightarrow$\\
       
  1018 $\Leftarrow$ \bl{$f(x) + \text{noise}$}
       
  1019 \end{tabular}
       
  1020 \;\;\;\;\begin{tabular}{@{}c}
       
  1021 Database\\
       
  1022 \bl{$x_1, \ldots, x_n$}
       
  1023 \end{tabular}
       
  1024 \end{center}
       
  1025 
       
  1026 
       
  1027 \begin{itemize}
       
  1028 \item \bl{$f(x)$} can be released, if \bl{$f$} is insensitive to
       
  1029 individual entries  \bl{$x_1, \ldots, x_n$}\\
       
  1030 \item Intuition: whatever is learned from the dataset would be learned regardless of whether
       
  1031 \bl{$x_i$} participates\bigskip\pause 
       
  1032 
       
  1033 \item Noised needed in order to prevent queries:\\ Christian's salary $=$ 
       
  1034 \begin{center}
       
  1035 \bl{\large$\Sigma$} all staff $-$  \bl{\large$\Sigma$} all staff $\backslash$ Christian
       
  1036 \end{center} 
       
  1037 \end{itemize}
       
  1038 
       
  1039 \end{frame}}
       
  1040 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
  1041 
       
  1042 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
  1043 \mode<presentation>{
       
  1044 \begin{frame}[c]
       
  1045 \frametitle{Adding Noise}
       
  1046 
       
  1047 Adding noise is not as trivial as one would wish:
       
  1048 
       
  1049 \begin{itemize}
       
  1050 \item If I ask how many of three have seen the Gangnam video and get a result
       
  1051 as follows 
       
  1052 
       
  1053 \begin{center}
       
  1054 \begin{tabular}{l|c}
       
  1055 Alice & yes\\
       
  1056 Bob & no\\
       
  1057 Charlie & yes\\
       
  1058 \end{tabular}
       
  1059 \end{center}
       
  1060 
       
  1061 then I have to add a noise of \bl{$1$}. So answers would be in the
       
  1062 range of \bl{$1$} to \bl{$3$}
       
  1063 
       
  1064 \bigskip
       
  1065 \item But if I ask five questions for all the dataset (has seen Gangnam video, is male, below 30, \ldots),
       
  1066 then one individual can change the dataset by \bl{$5$}
       
  1067 \end{itemize}
       
  1068 
       
  1069 \end{frame}}
       
  1070 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
  1071      
       
  1072 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
  1073 \mode<presentation>{
       
  1074 \begin{frame}[c]
       
  1075 \frametitle{\begin{tabular}{@{}c@{}}Take Home Point\end{tabular}}
       
  1076 
       
  1077 According to Ross Anderson: \bigskip
       
  1078 \begin{itemize}
       
  1079 \item Privacy in a big hospital is just about doable.\medskip
       
  1080 \item How do you enforce privacy  in something as big as Google
       
  1081 or complex as Facebook? No body knows.\bigskip
       
  1082 
       
  1083 Similarly, big databases imposed by government
       
  1084 \end{itemize}
       
  1085 
       
  1086 
       
  1087 \end{frame}}
       
  1088 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
  1089      
   732      
  1090 \end{document}
   733 \end{document}
  1091 
   734 
  1092 %%% Local Variables:  
   735 %%% Local Variables:  
  1093 %%% mode: latex
   736 %%% mode: latex