slides/slides07.tex
changeset 305 851a22e8d570
parent 304 9d1cf03d1b32
child 381 036a762b02cf
equal deleted inserted replaced
304:9d1cf03d1b32 305:851a22e8d570
     4 % beamer stuff 
     4 % beamer stuff 
     5 \renewcommand{\slidecaption}{APP 07, King's College London}
     5 \renewcommand{\slidecaption}{APP 07, King's College London}
     6 \newcommand{\bl}[1]{\textcolor{blue}{#1}}
     6 \newcommand{\bl}[1]{\textcolor{blue}{#1}}
     7 
     7 
     8 \begin{document}
     8 \begin{document}
       
     9 
       
    10 %% Differential privacy
       
    11 %% http://www.cis.upenn.edu/~aaroth/Papers/privacybook.pdf
     9 
    12 
    10 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    13 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    11 \begin{frame}[t]
    14 \begin{frame}[t]
    12 \frametitle{%
    15 \frametitle{%
    13   \begin{tabular}{@ {}c@ {}}
    16   \begin{tabular}{@ {}c@ {}}
    16   \LARGE Privacy Policies (7)\\[-6mm] 
    19   \LARGE Privacy Policies (7)\\[-6mm] 
    17   \end{tabular}}\bigskip\bigskip\bigskip
    20   \end{tabular}}\bigskip\bigskip\bigskip
    18 
    21 
    19   \normalsize
    22   \normalsize
    20   \begin{center}
    23   \begin{center}
    21   \begin{tabular}{ll}
    24   \begin{tabular}{ll}Ch
    22   Email:  & christian.urban at kcl.ac.uk\\
    25   Email:  & christian.urban at kcl.ac.uk\\
    23   Office: & S1.27 (1st floor Strand Building)\\
    26   Office: & S1.27 (1st floor Strand Building)\\
    24   Slides: & KEATS (also homework is there)\\
    27   Slides: & KEATS (also homework is there)\\
    25   \end{tabular}
    28   \end{tabular}
    26   \end{center}
    29   \end{center}
   271 \begin{itemize}
   274 \begin{itemize}
   272 \item 15 Million active users in the UK; 500M tweets per day
   275 \item 15 Million active users in the UK; 500M tweets per day
   273 \item a company called Datasift is allowed to mine all tweets since 2010
   276 \item a company called Datasift is allowed to mine all tweets since 2010
   274 \item they charge 10k per month for other companies to target advertisement
   277 \item they charge 10k per month for other companies to target advertisement
   275 \end{itemize}
   278 \end{itemize}
   276 \end{itemize}\pause
   279 \end{itemize}
   277 
   280 
   278 
   281 
   279 \end{frame}
   282 \end{frame}
   280 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
   283 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
   281 
   284 
   363 \begin{frame}[c]
   366 \begin{frame}[c]
   364 \frametitle{Re-identification Attacks}
   367 \frametitle{Re-identification Attacks}
   365 
   368 
   366 
   369 
   367 \begin{itemize}
   370 \begin{itemize}
   368 \item in 1990 medical databases were made public with names removed, but  birth dates, 
   371 
   369 gender, ZIP-code were retained\medskip
   372 \item in 1990 medical databases were routinely made public
   370 \item could be cross referenced with public voter registration data in order to find out what the
   373       with names removed, but birth dates, gender, ZIP-code
   371 medical record of the governor of Massachusetts was (in 1997 Latanya Sweeney)
   374       were retained\medskip
       
   375 
       
   376 \item could be cross referenced with public voter registration
       
   377       data in order to find out what the medical record of the
       
   378       governor of Massachusetts was (in 1997 Latanya Sweeney)
       
   379 
   372 \end{itemize}
   380 \end{itemize}
   373 
   381 
   374 \end{frame}}
   382 \end{frame}}
   375 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
   383 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
   376 
   384 
   403 
   411 
   404 \begin{itemize}
   412 \begin{itemize}
   405 \item In 2006, AOL published 20 million Web search queries 
   413 \item In 2006, AOL published 20 million Web search queries 
   406   collected of 650,000 users (names had been deleted)\medskip
   414   collected of 650,000 users (names had been deleted)\medskip
   407   
   415   
   408 \item \ldots{}within days an old lady, Thelma Arnold, from 
   416 \item \ldots{}within days an old lady, Thelma Arnold, from
   409   Lilburn, Georgia, was identified as user No.~4417749\medskip
   417       Lilburn, Georgia, (11,596 inhabitants) was identified as
       
   418       user No.~4417749\medskip
   410   
   419   
   411 \item some of the queries that identified her away:  
   420 \item some of the queries that identified her away:  
   412 \begin{itemize}
   421 \begin{itemize}
   413 \item landscapers in Lilburn, Ga
   422 \item landscapers in Lilburn, Ga
   414 \item 60 single men
   423 \item 60 single men
   446 
   455 
   447 \end{frame}
   456 \end{frame}
   448 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
   457 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
   449 
   458 
   450 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   459 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   451 \mode<presentation>{
   460 \begin{frame}[c]
   452 \begin{frame}<2>[c]
       
   453 \frametitle{\Large We cannot exclude all Harm}
   461 \frametitle{\Large We cannot exclude all Harm}
   454 
   462 
   455 \begin{itemize}
   463 \begin{itemize}
   456 \item Analysis of a given data set teaches us that smoking causes cancer. 
   464 \item Analysis of a given data set teaches us that smoking causes cancer. 
   457 Mary, a smoker, is harmed by this analysis: her insurance premiums rise. 
   465 Mary, a smoker, is harmed by this analysis: her insurance premiums rise. 
   459 Mary is harmed by the finding smoking causes cancer.\bigskip
   467 Mary is harmed by the finding smoking causes cancer.\bigskip
   460 
   468 
   461 \item \ldots of course she is also helped; she might quit smoking
   469 \item \ldots of course she is also helped; she might quit smoking
   462 \end{itemize}
   470 \end{itemize}
   463 
   471 
   464 \end{frame}}
   472 \end{frame}
   465 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
   473 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   474      
       
   475 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   476 \begin{frame}[c]
       
   477 \frametitle{\Large We cannot exclude all Harm}
       
   478 
       
   479 Supervising queries will also not work in general:
       
   480 
       
   481 \begin{itemize}
       
   482 \item denying a request can already disclose information
       
   483 
       
   484 \item in general it is not decidable, whether a sequence
       
   485   of queries can identify a person
       
   486 \end{itemize}
       
   487 
       
   488 \end{frame}
       
   489 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   490      
       
   491      
   466      
   492      
   467 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   493 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   468 \mode<presentation>{
   494 \mode<presentation>{
   469 \begin{frame}<2>[c]
   495 \begin{frame}<2>[c]
   470 \frametitle{Differential Privacy}
   496 \frametitle{Differential Privacy}
   535 
   561 
   536 \end{frame}}
   562 \end{frame}}
   537 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
   563 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
   538 
   564 
   539 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   565 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   540 \mode<presentation>{
       
   541 \begin{frame}[c]
   566 \begin{frame}[c]
   542 \frametitle{Adding Noise}
   567 \frametitle{Adding Noise}
   543 
   568 
   544 Adding noise is not as trivial as one would wish:
   569 Adding noise is not as trivial as one would wish:
   545 
   570 
   561 \bigskip
   586 \bigskip
   562 \item But if I ask five questions for all the dataset (has the disease, is male, below 30, \ldots),
   587 \item But if I ask five questions for all the dataset (has the disease, is male, below 30, \ldots),
   563 then one individual can change the dataset by \bl{$5$}
   588 then one individual can change the dataset by \bl{$5$}
   564 \end{itemize}
   589 \end{itemize}
   565 
   590 
   566 \end{frame}}
   591 \end{frame}
   567 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
   592 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
   568 
   593 
   569 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   594 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   570 \mode<presentation>{
   595 \begin{frame}[c]
   571 \begin{frame}[t]
   596 \frametitle{\Large Differential Privacy Problems}
   572 \frametitle{\begin{tabular}{@{}c@{}}Tor (private web browsing)\end{tabular}}
   597 
   573 
   598 \begin{itemize}
   574 \begin{itemize}
   599 \item How to do differential privacy ``offline'' is still
   575 \item initially developed by US Navy Labs, but then opened up to the world 
   600 an active research question?
   576 \item network of proxy nodes
   601 
   577 \item a Tor client establishes a ``random'' path to the destination server (you cannot trace back where the information came from)\bigskip\pause
   602 \item What constitutes a single entry in the database?
   578 \end{itemize}
   603 
   579 
   604 \item Evolution of a database:
   580 \only<2>{
   605 \end{itemize}
   581 \begin{itemize}
   606 
   582 \item malicious exit node attack: someone set up 5 Tor exit nodes and monitored the traffic:
   607 \begin{center}\small
   583 \begin{itemize}
   608 \begin{tabular}{l|ll}
   584 \item a number of logons and passwords used by embassies (Usbekistan `s1e7u0l7c', while
   609 Name	 & Has the disease?\\\hline
   585 Tunesia `Tunesia' and India `1234')
   610 Alice          & yes\\ 
   586 \end{itemize}
   611 Bob     	 & no\\
   587 \end{itemize}}
   612 Charlie	 & yes\\
   588 \only<3>{
   613 Eve	         & no\\
   589 \begin{itemize}
   614 Chandler	 & yes\\
   590 \item bad apple attack: if you have one insecure application, your IP can be tracked through Tor
   615 Marc       & yes & $\Leftarrow$ new entry\\
   591 \begin{itemize}
   616 \end{tabular}
   592 \item background: 40\% of traffic on Tor is generated by BitTorrent
   617 \end{center}
   593 \end{itemize}
   618 
   594 \end{itemize}}
   619 \end{frame}
   595 
   620 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
   596 
   621 
   597 \end{frame}}
   622 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   598 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
   623 \begin{frame}[c]
   599 
   624 \frametitle{Tor}
   600 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   625 
   601 \mode<presentation>{
   626 \begin{center}
   602 \begin{frame}[c]
   627 ??
   603 \frametitle{Tor Nodes}
   628 \end{center}
   604 
   629 \end{frame}
   605 Dan Egerstad wrote:\bigskip
   630 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
   606 
   631 
   607 \it ``If you actually look in to where these Tor nodes are hosted and how big they are, some of these nodes cost thousands of dollars each month just to host because they're using lots of bandwidth, they're heavy-duty servers and so on. Who would pay for this and be anonymous?" 
   632 
   608 
   633 
   609 
       
   610 \end{frame}}
       
   611 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   612 
       
   613 
       
   614 
       
   615 
       
   616 
       
   617 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   618 \begin{frame}[t]
       
   619 \frametitle{\begin{tabular}{@{}c@{}}Skype\end{tabular}}
       
   620 
       
   621 \begin{itemize}
       
   622 \item Skype used to be known as a secure online communication (encryption cannot be disabled), 
       
   623 but \ldots\medskip
       
   624 
       
   625 \item it is impossible to verify whether crypto algorithms are correctly used, or whether  there are backdoors.\bigskip
       
   626  
       
   627 \item recently someone found out that you can reset the password of somebody else's
       
   628 account, only knowing their email address (needed to suspended the password reset feature temporarily)
       
   629 \end{itemize}
       
   630 
       
   631 
       
   632 \end{frame}
       
   633 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       
   634      
   634      
   635 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   635 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   636 \begin{frame}[c]
   636 \begin{frame}[c]
   637 \frametitle{Take Home Point}
   637 \frametitle{Take Home Point}
   638 
   638