slides/slides09.tex
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Mon, 23 Sep 2013 22:23:55 +0100
changeset 98 3d585e603927
parent 90 d1d07f05325a
child 145 279fa5a06231
permissions -rw-r--r--
updated

\documentclass[dvipsnames,14pt,t]{beamer}
\usepackage{proof}
\usepackage{beamerthemeplainculight}
\usepackage[T1]{fontenc}
\usepackage[latin1]{inputenc}
\usepackage{mathpartir}
\usepackage{isabelle}
\usepackage{isabellesym}
\usepackage[absolute,overlay]{textpos}
\usepackage{ifthen}
\usepackage{tikz}
\usepackage{courier}
\usepackage{listings}
\usetikzlibrary{arrows}
\usetikzlibrary{positioning}
\usetikzlibrary{calc}
\usepackage{graphicx} 
\usetikzlibrary{shapes}
\usetikzlibrary{shadows}
\usetikzlibrary{plotmarks}


\isabellestyle{rm}
\renewcommand{\isastyle}{\rm}%
\renewcommand{\isastyleminor}{\rm}%
\renewcommand{\isastylescript}{\footnotesize\rm\slshape}%
\renewcommand{\isatagproof}{}
\renewcommand{\endisatagproof}{}
\renewcommand{\isamarkupcmt}[1]{#1}

% Isabelle characters
\renewcommand{\isacharunderscore}{\_}
\renewcommand{\isacharbar}{\isamath{\mid}}
\renewcommand{\isasymiota}{}
\renewcommand{\isacharbraceleft}{\{}
\renewcommand{\isacharbraceright}{\}}
\renewcommand{\isacharless}{$\langle$}
\renewcommand{\isachargreater}{$\rangle$}
\renewcommand{\isasymsharp}{\isamath{\#}}
\renewcommand{\isasymdots}{\isamath{...}}
\renewcommand{\isasymbullet}{\act}



\definecolor{javared}{rgb}{0.6,0,0} % for strings
\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments
\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords
\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc

\lstset{language=Java,
	basicstyle=\ttfamily,
	keywordstyle=\color{javapurple}\bfseries,
	stringstyle=\color{javagreen},
	commentstyle=\color{javagreen},
	morecomment=[s][\color{javadocblue}]{/**}{*/},
	numbers=left,
	numberstyle=\tiny\color{black},
	stepnumber=1,
	numbersep=10pt,
	tabsize=2,
	showspaces=false,
	showstringspaces=false}

\lstdefinelanguage{scala}{
  morekeywords={abstract,case,catch,class,def,%
    do,else,extends,false,final,finally,%
    for,if,implicit,import,match,mixin,%
    new,null,object,override,package,%
    private,protected,requires,return,sealed,%
    super,this,throw,trait,true,try,%
    type,val,var,while,with,yield},
  otherkeywords={=>,<-,<\%,<:,>:,\#,@},
  sensitive=true,
  morecomment=[l]{//},
  morecomment=[n]{/*}{*/},
  morestring=[b]",
  morestring=[b]',
  morestring=[b]"""
}

\lstset{language=Scala,
	basicstyle=\ttfamily,
	keywordstyle=\color{javapurple}\bfseries,
	stringstyle=\color{javagreen},
	commentstyle=\color{javagreen},
	morecomment=[s][\color{javadocblue}]{/**}{*/},
	numbers=left,
	numberstyle=\tiny\color{black},
	stepnumber=1,
	numbersep=10pt,
	tabsize=2,
	showspaces=false,
	showstringspaces=false}

% beamer stuff 
\renewcommand{\slidecaption}{APP 09, King's College London, 27 November 2012}
\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions
\newcommand{\bl}[1]{\textcolor{blue}{#1}}

\begin{document}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}<1>[t]
\frametitle{%
  \begin{tabular}{@ {}c@ {}}
  \\
  \LARGE Access Control and \\[-3mm] 
  \LARGE Privacy Policies (9)\\[-6mm] 
  \end{tabular}}\bigskip\bigskip\bigskip

  %\begin{center}
  %\includegraphics[scale=1.3]{pics/barrier.jpg}
  %\end{center}

\normalsize
  \begin{center}
  \begin{tabular}{ll}
  Email:  & christian.urban at kcl.ac.uk\\
  Of$\!$fice: & S1.27 (1st floor Strand Building)\\
  Slides: & KEATS (also homework is there)\\
  \end{tabular}
  \end{center}

\end{frame}}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{Last Week}

Recall, the Schroeder-Needham (1978) protocol is vulnerable to replay attacks.

\begin{center}
\begin{tabular}{@{}r@ {\hspace{1mm}}l@{}}
\bl{$A \rightarrow S :$} & \bl{$A, B, N_A$}\\
\bl{$S \rightarrow A :$} & \bl{$\{N_A, B, K_{AB},\{K_{AB}, A\}_{K_{BS}} \}_{K_{AS}}$}\\
\bl{$A \rightarrow B :$} & \bl{$\{K_{AB}, A\}_{K_{BS}} $}\\
\bl{$B \rightarrow A :$} & \bl{$\{N_B\}_{K_{AB}}$}\\
\bl{$A \rightarrow B :$} & \bl{$\{N_B-1\}_{K_{AB}}$}\\
\end{tabular}
\end{center}\pause

Fix: Replace messages 2 and 3 to include a timestamp:\bigskip

\begin{minipage}{1.1\textwidth}
\begin{center}
\begin{tabular}{@{\hspace{-2mm}}r@ {\hspace{1mm}}l@{}}
\bl{$S \rightarrow A :$} & \bl{$\{B, K_{\!AB}, T_S, \!\{K_{\!AB}, A, T_S\}_{K_{BS}} \}_{K_{AS}}$}\\
\bl{$A \rightarrow B :$} & \bl{$\{K_{AB}, A, T_S\}_{K_{BS}} $}\\
\end{tabular}
\end{center}
\end{minipage}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[t]
\frametitle{Denning-Sacco Fix}

Denning-Sacco (1981) suggested to add the timestamp, but omit the handshake:\bigskip

\begin{minipage}{1.1\textwidth}
\begin{center}
\begin{tabular}{@{\hspace{-2mm}}r@ {\hspace{1mm}}l@{}}
\bl{$A \rightarrow S :$} & \bl{$A, B$}\\
\bl{$S \rightarrow A :$} & \bl{$\{B, K_{\!AB}, T_S, \!\{K_{\!AB}, A, T_S\}_{K_{BS}} \}_{K_{AS}}$}\\
\bl{$A \rightarrow B :$} & \bl{$\{K_{AB}, A, T_S\}_{K_{BS}} $}\\
\textcolor{lightgray}{$B \rightarrow A :$} & \textcolor{lightgray}{$\{N_B\}_{K_{AB}}$}\\
\textcolor{lightgray}{$A \rightarrow B :$} & \textcolor{lightgray}{$\{N_B-1\}_{K_{AB}}$}\\
\end{tabular}
\end{center}
\end{minipage}\bigskip

they argue \bl{$A$} and \bl{$B$} can check that the messages are not replays of earlier 
runs, by checking the time difference with when the protocol is last used
\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[t]
\frametitle{\begin{tabular}{@{}c@{}}Denning-Sacco-Lowe Fix of Fix\end{tabular}}

Lowe (1997) disagreed and said the handshake should be kept, 
otherwise:\bigskip 

\begin{minipage}{1.1\textwidth}
\begin{center}
\begin{tabular}{@{\hspace{-7mm}}r@ {\hspace{1mm}}l@{}}
\bl{$A \rightarrow S :$} & \bl{$A, B$}\\
\bl{$S \rightarrow A :$} & \bl{$\{B, K_{\!AB}, T_S, \!\{K_{\!AB}, A, T_S\}_{K_{BS}} \}_{K_{AS}}$}\\
\bl{$A \rightarrow B :$} & \bl{$\{K_{AB}, A, T_S\}_{K_{BS}} $}\\
\bl{$I(A) \rightarrow B :$} & \bl{$\{K_{AB}, A, T_S\}_{K_{BS}} $}\hspace{5mm}\textcolor{black}{replay}\\
\end{tabular}
\end{center}
\end{minipage}\bigskip

When is this a problem?\pause\medskip

Assume \bl{$B$} is a bank and the message is ``Draw \pounds{1000} from \bl{$A$}'s
account and transfer it to \bl{$I$}.''
\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[t]
\frametitle{Privacy}

\begin{minipage}{1.05\textwidth}
\begin{itemize}
\item we \alert{do} want that government data is made public (free maps for example)
\item we \alert{do not} want that medical data becomes public (similarly tax data, school 
records, job offers)\bigskip
\item personal information can potentially lead to fraud 
(identity theft)
\end{itemize}\pause

{\bf ``The reality'':}
\only<2>{\begin{itemize}
\item London Health Programmes lost in June unencrypted details of more than 8 million people
(no names, but postcodes and details such as gender, age and ethnic origin)
\end{itemize}}
\only<3>{\begin{itemize}
\item also in June Sony, got hacked: over 1M users' personal information, including passwords, email addresses, home addresses, dates of birth, and all Sony opt-in data associated with their accounts.
\end{itemize}}
\end{minipage}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

   
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{Privacy and Big Data}

Selected sources of ``Big Data'':\smallskip{}

\begin{itemize}
\item Facebook 
\begin{itemize}
\item 40+ Billion photos (100 PB)
\item 6 Billion messages daily (5 - 10 TB)
\item 900 Million users  
\end{itemize}
\item Common Crawl
\begin{itemize}
\item covers 3.8 Billion webpages (2012 dataset)
\item 50 TB of data
\end{itemize}
\item Google
\begin{itemize}
\item 20 PB daily (2008)
\end{itemize}
\item Twitter
\begin{itemize}
\item 7 Million users in the UK
\item a company called Datasift is allowed to mine all tweets since 2010
\item they charge 10k per month for other companies to target advertisement
\end{itemize}
\end{itemize}\pause


\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{Cookies\ldots}

``We have published a new cookie policy. It explains what cookies are 
and how we use them on our site. To learn more about cookies and 
their benefits, please view our cookie policy.\medskip

If you'd like to disable cookies on this device, please view our information 
pages on 'How to manage cookies'. Please be aware that parts of the 
site will not function correctly if you disable cookies. \medskip

By closing this 
message, you consent to our use of cookies on this device in accordance 
with our cookie policy unless you have disabled them.''


\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{Scare Tactics}

The actual policy reads:\bigskip

``As we explain in our Cookie Policy, cookies help you to get the most 
out of our websites.\medskip

If you do disable our cookies you may find that certain sections of our 
website do not work. For example, you may have difficulties logging in 
or viewing articles.''




\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{Netflix Prize}

Anonymity is \alert{necessary} for privacy, but \alert{not} enough!\bigskip

\begin{itemize}
\item Netflix offered in 2006 (and every year until 2010) a 1 Mio \$ prize for improving their movie rating algorithm
\item dataset contained 10\% of all Netflix users (appr.~500K)
\item names were removed, but included numerical ratings as well as times of rating
\item some information was \alert{perturbed} (i.e., slightly modified)
\end{itemize}

\hfill{\bf\alert{All OK?}}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{Re-identification Attack}

Two researchers analysed the data: 

\begin{itemize}
\item with 8 ratings (2 of them can be wrong) and corresponding dates that can have a margin 14-day error, 98\% of the
records can be identified
\item for 68\% only two ratings and dates are sufficient (for movie ratings outside the top 500)\bigskip\pause
\item they took 50 samples from IMDb (where people can reveal their identity)
\item 2 of them uniquely identified entries in the Netflix database (either by movie rating or by dates)
\end{itemize}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{}

\begin{itemize}
\item Birth data, postcode and gender (unique for\\ 87\% of the US population)
\item Preferences in movies (99\% of 500K for 8 ratings)
\end{itemize}\bigskip

Therefore best practices / or even law (HIPAA, EU): 

\begin{itemize}
\item only year dates (age group for 90 years or over), 
\item no postcodes (sector data is OK, similarly in the US)\\
\textcolor{gray}{no names, addresses, account numbers, licence plates}
\item disclosure information needs to be retained for 5 years
\end{itemize}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{How to Safely Disclose Information?}

\only<1>{
\begin{itemize}
\item Assume you make a survey of 100 randomly chosen people.
\item Say 99\% of the surveyed people in the 10 - 40 age group have seen the
Gangnam video on youtube.\bigskip

\item What can you infer about the rest of the population? 
\end{itemize}}
\only<2>{
\begin{itemize}
\item Is it possible to re-identify data later, if more data is released. \bigskip\bigskip\pause

\item Not even releasing only  aggregate information prevents re-identification attacks.
(GWAS was a public database of gene-frequency studies linked to diseases;
you only needed partial DNA information  in order
to identify whether an individual was part of the study --- DB closed in 2008) 
\end{itemize}}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{Differential Privacy}

\begin{center}
User\;\;\;\;    
\begin{tabular}{c}
tell me \bl{$f(x)$} $\Rightarrow$\\
$\Leftarrow$ \bl{$f(x) + \text{noise}$}
\end{tabular}
\;\;\;\;\begin{tabular}{@{}c}
Database\\
\bl{$x_1, \ldots, x_n$}
\end{tabular}
\end{center}


\begin{itemize}
\item \bl{$f(x)$} can be released, if \bl{$f$} is insensitive to
individual entries  \bl{$x_1, \ldots, x_n$}\\
\item Intuition: whatever is learned from the dataset would be learned regardless of whether
\bl{$x_i$} participates\bigskip\pause 

\item Noised needed in order to prevent queries:\\ Christian's salary $=$ 
\begin{center}
\bl{\large$\Sigma$} all staff $-$  \bl{\large$\Sigma$} all staff $\backslash$ Christian
\end{center} 
\end{itemize}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{Adding Noise}

Adding noise is not as trivial as one would wish:

\begin{itemize}
\item If I ask how many of three have seen the Gangnam video and get a result
as follows 

\begin{center}
\begin{tabular}{l|c}
Alice & yes\\
Bob & no\\
Charlie & yes\\
\end{tabular}
\end{center}

then I have to add a noise of \bl{$1$}. So answers would be in the
range of \bl{$1$} to \bl{$3$}

\bigskip
\item But if I ask five questions for all the dataset (has seen Gangnam video, is male, below 30, \ldots),
then one individual can change the dataset by \bl{$5$}
\end{itemize}

\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[t]
\frametitle{\begin{tabular}{@{}c@{}}Tor, Anonymous Webbrowsing\end{tabular}}

\begin{itemize}
\item initially developed by US Navy Labs, but then opened up to the world 
\item network of proxy nodes
\item a Tor client establishes a ``random'' path to the destination server (you cannot trace back where the information came from)\bigskip\pause
\end{itemize}

\only<2>{
\begin{itemize}
\item malicious exit node attack: someone set up 5 Tor exit nodes and monitored the traffic:
\begin{itemize}
\item a number of logons and passwords used by embassies (Usbekistan `s1e7u0l7c', while
Tunesia `Tunesia' and India `1234')
\end{itemize}
\end{itemize}}
\only<3>{
\begin{itemize}
\item bad apple attack: if you have one insecure application, your IP can be tracked through Tor
\begin{itemize}
\item background: 40\% of traffic on Tor is generated by BitTorrent
\end{itemize}
\end{itemize}}


\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[t]
\frametitle{\begin{tabular}{@{}c@{}}Skype Secure Communication\end{tabular}}

\begin{itemize}
\item Skype used to be known as a secure online communication (encryption cannot be disabled), 
but \ldots\medskip

\item it is impossible to verify whether crypto algorithms are correctly used, or whether  there are backdoors.\bigskip
 
\item recently someone found out that you can reset the password of somebody else's
account, only knowing their email address (needed to suspended the password reset feature temporarily)
\end{itemize}


\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{\begin{tabular}{@{}c@{}}Take Home Point\end{tabular}}

According to Ross Anderson: \bigskip
\begin{itemize}
\item Privacy in a big hospital is just about doable.\medskip
\item How do you enforce privacy  in something as big as Google
or complex as Facebook? No body knows.\bigskip

Similarly, big databases imposed by government
\end{itemize}


\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\mode<presentation>{
\begin{frame}[c]
\frametitle{\begin{tabular}{@{}c@{}}Next Week\end{tabular}}

Homework: Which areas should I focus on?


\end{frame}}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

\end{document}

%%% Local Variables:  
%%% mode: latex
%%% TeX-master: t
%%% End: