slides01.tex
changeset 15 cd0ceaf89c1d
child 16 c51178fa85fe
equal deleted inserted replaced
14:610f14009c0b 15:cd0ceaf89c1d
       
     1 \documentclass[dvipsnames,14pt,t,xelatex]{beamer}
       
     2 \usepackage{./slides}
       
     3 \usepackage{./graph}
       
     4 \usepackage{./langs}
       
     5 \usepackage{./data}
       
     6 
       
     7 \hfuzz=220pt 
       
     8 
       
     9 \lstset{language=Scala,
       
    10         style=mystyle,
       
    11         numbersep=0pt,
       
    12         numbers=none,
       
    13         xleftmargin=0mm}
       
    14 
       
    15 \newcommand{\bl}[1]{\textcolor{blue}{#1}}     
       
    16  
       
    17 % beamer stuff 
       
    18 \renewcommand{\slidecaption}{CFL 01, King's College London}
       
    19 
       
    20 
       
    21 \begin{filecontents}{re-python2.data}
       
    22 1 0.033
       
    23 5 0.036
       
    24 10 0.034
       
    25 15 0.036
       
    26 18 0.059
       
    27 19 0.084 
       
    28 20 0.141
       
    29 21 0.248
       
    30 22 0.485
       
    31 23 0.878
       
    32 24 1.71
       
    33 25 3.40
       
    34 26 7.08
       
    35 27 14.12
       
    36 28 26.69
       
    37 \end{filecontents}
       
    38 
       
    39 \begin{filecontents}{re-java.data}
       
    40 5  0.00298
       
    41 10  0.00418
       
    42 15  0.00996
       
    43 16  0.01710
       
    44 17  0.03492
       
    45 18  0.03303
       
    46 19  0.05084
       
    47 20  0.10177
       
    48 21  0.19960
       
    49 22  0.41159
       
    50 23  0.82234
       
    51 24  1.70251
       
    52 25  3.36112
       
    53 26  6.63998
       
    54 27  13.35120
       
    55 28  29.81185
       
    56 \end{filecontents} 
       
    57 
       
    58 \begin{filecontents}{re-java9.data}
       
    59 1000  0.01410
       
    60 2000  0.04882
       
    61 3000  0.10609
       
    62 4000  0.17456
       
    63 5000  0.27530
       
    64 6000  0.41116
       
    65 7000  0.53741
       
    66 8000  0.70261
       
    67 9000  0.93981
       
    68 10000 0.97419
       
    69 11000 1.28697
       
    70 12000 1.51387
       
    71 14000 2.07079
       
    72 16000 2.69846
       
    73 20000 4.41823
       
    74 24000 6.46077
       
    75 26000 7.64373
       
    76 30000 9.99446
       
    77 34000 12.966885
       
    78 38000 16.281621
       
    79 42000 19.180228
       
    80 46000 21.984721
       
    81 50000 26.950203
       
    82 60000 43.0327746
       
    83 \end{filecontents}
       
    84 
       
    85 
       
    86 
       
    87 \begin{document}
       
    88 
       
    89 
       
    90 
       
    91 
       
    92 
       
    93 
       
    94 
       
    95 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
    96 \begin{frame}[t]
       
    97 \frametitle{%  
       
    98   \begin{tabular}{@ {}c@ {}}
       
    99   \\[-3mm]
       
   100   \LARGE Compilers and \\[-1mm] 
       
   101   \LARGE Formal Languages (1)\\[-3mm] 
       
   102   \end{tabular}}
       
   103 
       
   104   \begin{center}
       
   105   %\includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm}
       
   106   %\includegraphics[scale=0.31]{pics/ante2.jpg}\\
       
   107   %\footnotesize\textcolor{gray}{Antikythera automaton, 100 BC (Archimedes?)}
       
   108   \end{center}
       
   109 
       
   110   \normalsize
       
   111   \begin{center}
       
   112   \begin{tabular}{ll}
       
   113   Email:  & christian.urban at kcl.ac.uk\\
       
   114   Office: & N\liningnums{7.07} (North Wing, Bush House)\\
       
   115   Slides: & KEATS
       
   116   \end{tabular}
       
   117   \end{center}
       
   118 
       
   119 \end{frame}
       
   120 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   121 
       
   122 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   123 \begin{frame}[t]
       
   124 \frametitle{Why Study Compilers?}
       
   125 
       
   126 John Regehr {\small(Univ.~Utah, LLVM compiler hacker)}\smallskip\\
       
   127 
       
   128 \begin{bubble}[10.5cm]
       
   129   \bf ``\ldots{}It’s effectively a perpetual
       
   130   employment act for solid compiler hackers.''
       
   131 \end{bubble}
       
   132 
       
   133 \onslide<1->{
       
   134 \only<2>{
       
   135 \begin{itemize}
       
   136 \item {\bf Hardware is getting weirder
       
   137   rather than getting clocked faster}
       
   138 
       
   139 \begin{itemize}
       
   140 \item  Almost all processors are
       
   141   multicores nowadays and it looks like there is increasing asymmetry in
       
   142   resources across cores. Processors come with vector units, crypto
       
   143   accelerators etc. We have DSPs, GPUs,
       
   144   ARM big.little, and Xeon Phi. This is only scratching the
       
   145   surface.
       
   146 \end{itemize}  
       
   147 \end{itemize}}
       
   148 \only<3>{
       
   149 \begin{itemize}
       
   150 \item {\bf We’re getting tired of low-level languages and
       
   151     their associated security disasters}
       
   152   
       
   153 \begin{itemize}
       
   154 \item 
       
   155   We want to write new code, to
       
   156   whatever extent possible, in safer, higher-level
       
   157   languages. Compilers are caught right in the middle of these
       
   158   opposing trends: one of their main jobs is to help bridge the large
       
   159   and growing gap between increasingly high-level languages and
       
   160   increasingly wacky platforms.
       
   161 \end{itemize}  
       
   162 \end{itemize}}}
       
   163 
       
   164 \end{frame}
       
   165 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   166 
       
   167 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   168 \begin{frame}[c]
       
   169 \frametitle{Why Bother?}
       
   170 
       
   171 \begin{columns}[t]
       
   172 \begin{column}{.5\textwidth}
       
   173 Ruby, Python, Java 8\medskip\\
       
   174 \begin{tikzpicture}\footnotesize
       
   175 \begin{axis}[
       
   176     xlabel={$n$},
       
   177     x label style={at={(1.05,0.0)}},
       
   178     ylabel={time in secs},
       
   179     enlargelimits=false,
       
   180     xtick={0,5,...,30},
       
   181     xmax=33,
       
   182     ymax=35,
       
   183     ytick={0,5,...,30},
       
   184     scaled ticks=false,
       
   185     axis lines=left,
       
   186     width=5.5cm,
       
   187     height=4cm, 
       
   188     legend entries={Python,Ruby},  
       
   189     legend pos=north west,
       
   190     legend cell align=left]
       
   191 \addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
       
   192 \addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data};
       
   193 \end{axis}
       
   194 \end{tikzpicture}
       
   195 \begin{tikzpicture}\footnotesize
       
   196 \begin{axis}[
       
   197     xlabel={$n$},
       
   198     x label style={at={(1.05,0.0)}},
       
   199     ylabel={time in secs},
       
   200     enlargelimits=false,
       
   201     xtick={0,5,...,30},
       
   202     xmax=33,
       
   203     ymax=35,
       
   204     ytick={0,5,...,30},
       
   205     scaled ticks=false,
       
   206     axis lines=left,
       
   207     width=5.5cm,
       
   208     height=4cm, 
       
   209     legend entries={Python, Java 8},  
       
   210     legend pos=north west,
       
   211     legend cell align=left]
       
   212 \addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};   
       
   213 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
       
   214 \end{axis}
       
   215 \end{tikzpicture}
       
   216 
       
   217 \end{column}
       
   218 \begin{column}{.5\textwidth}
       
   219 Us (after next lecture)\medskip\\
       
   220 \begin{tikzpicture}\footnotesize
       
   221 \begin{axis}[
       
   222     xlabel={$n$},
       
   223     x label style={at={(1.07,0.0)}},
       
   224     ylabel={time in secs},
       
   225     enlargelimits=false,
       
   226     xtick={0,5000,...,10000},
       
   227     xmax=11000,
       
   228     ymax=35,
       
   229     ytick={0,5,...,30},
       
   230     scaled ticks=false,
       
   231     axis lines=left,
       
   232     width=5.5cm,
       
   233     height=4cm]
       
   234 \addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
       
   235 \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
       
   236 \end{axis}
       
   237 \end{tikzpicture}
       
   238 \begin{tikzpicture}\footnotesize
       
   239 \begin{axis}[
       
   240     xlabel={$n$},
       
   241     x label style={at={(1.07,0.0)}},
       
   242     ylabel={time in secs},
       
   243     enlargelimits=false,
       
   244     ymax=35,
       
   245     ytick={0,5,...,30},
       
   246     scaled ticks=false,
       
   247     axis lines=left,
       
   248     width=5.5cm,
       
   249     height=4cm]
       
   250 \addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
       
   251 \end{axis}
       
   252 \end{tikzpicture}
       
   253 \end{column}
       
   254 \end{columns}\bigskip
       
   255 
       
   256 \small\centering
       
   257 matching \texttt{[a?]\{n\}[a]\{n\}} and \texttt{(a*)*b}
       
   258 against $\underbrace{\texttt{a}...\texttt{a}}_n$
       
   259 \end{frame} 
       
   260 
       
   261 \begin{frame}
       
   262 \begin{column}{.5\textwidth}
       
   263 \begin{tikzpicture}
       
   264 \begin{axis}[
       
   265     xlabel={$n$},
       
   266     x label style={at={(1.05,0.0)}},
       
   267     ylabel={time in secs},
       
   268     y label style={at={(0.06,0.5)}},
       
   269     enlargelimits=false,
       
   270     xtick={0,5,...,30},
       
   271     xmax=33,
       
   272     ymax=45,
       
   273     ytick={0,10,...,40},
       
   274     scaled ticks=false,
       
   275     axis lines=left,
       
   276     width=6cm,
       
   277     height=4.5cm, 
       
   278     legend entries={Python, Java 8},  
       
   279     legend pos=north west]
       
   280 \addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
       
   281 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
       
   282 \end{axis}
       
   283 \end{tikzpicture}
       
   284 \begin{tikzpicture}
       
   285 \begin{axis}[
       
   286     xlabel={$n$},
       
   287     x label style={at={(1.05,0.0)}},
       
   288     ylabel={time in secs},
       
   289     y label style={at={(0.06,0.5)}},
       
   290     %enlargelimits=false,
       
   291     %xtick={0,5000,...,30000},
       
   292     xmax=65000,
       
   293     ymax=45,
       
   294     ytick={0,10,...,40},
       
   295     scaled ticks=false,
       
   296     axis lines=left,
       
   297     width=6cm,
       
   298     height=4.5cm, 
       
   299     legend entries={Java 9},  
       
   300     legend pos=north west]
       
   301 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java9.data};
       
   302 \end{axis}
       
   303 \end{tikzpicture}
       
   304 \end{column}
       
   305 \end{frame}
       
   306 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
       
   307 
       
   308 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   309 \begin{frame}[c]
       
   310 \frametitle{Evil Regular Expressions}
       
   311 
       
   312 \begin{itemize}
       
   313 \item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip
       
   314 \item Evil regular expressions\medskip
       
   315 \begin{itemize}
       
   316 \item \bl{$(a^{?\{n\}}) \cdot a^{\{n\}}$}
       
   317 \item \bl{$(a^*)^*\cdot b$}
       
   318 \item \bl{$([a$\,-\,$z]^+)^*$}
       
   319 \item \bl{$(a + a \cdot a)^*$}
       
   320 \item \bl{$(a + a^?)^*$}
       
   321 \end{itemize}
       
   322 
       
   323 \item sometimes also called \alert{catastrophic backtracking}
       
   324 \item this is a problem for \alert{N}etwork \alert{I}ntrusion
       
   325   \alert{D}etection systems, StackExchange, Atom editor
       
   326 \item \url{https://vimeo.com/112065252}  
       
   327 \end{itemize}
       
   328 
       
   329 \end{frame}
       
   330 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   331 
       
   332 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   333 \begin{frame}[c]
       
   334 \frametitle{The Goal of this Module}
       
   335 
       
   336 \begin{center}
       
   337   \begin{tikzpicture}[scale=1,
       
   338                       node/.style={
       
   339                       rectangle,rounded corners=3mm,
       
   340                       very thick,draw=black!50,minimum height=18mm, minimum width=20mm,
       
   341                       top color=white,bottom color=black!20}]
       
   342 
       
   343   \node at (3.05, 1.8) {\Large\bf write a compiler};
       
   344 
       
   345   \node (0) at (-2.3,0) {};  
       
   346   \node [above=5mm of 0]
       
   347   {\makebox[0mm]{\footnotesize
       
   348       \begin{tabular}{@{}l@{}}input\\[-1mm]program\end{tabular}}}; 
       
   349      
       
   350   \node (A) at (0,0)  [node] {};
       
   351   \node [below right] at (A.north west) {lexer};
       
   352 
       
   353   \node (B) at (3,0)  [node] {};
       
   354   \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser};
       
   355 
       
   356   \node (C) at (6,0)  [node] {};
       
   357   \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen};
       
   358 
       
   359   \node (1) at (8.4,0) {};
       
   360   \node [above=5mm of 1]
       
   361   {\makebox[0mm]{\footnotesize
       
   362       \begin{tabular}{@{}r@{}}binary\\[-1mm]code\end{tabular}}};
       
   363 
       
   364   \draw [->,line width=4mm] (0) -- (A); 
       
   365   \draw [->,line width=4mm] (A) -- (B); 
       
   366   \draw [->,line width=4mm] (B) -- (C); 
       
   367   \draw [->,line width=4mm] (C) -- (1); 
       
   368   \end{tikzpicture}
       
   369   \end{center}
       
   370 
       
   371 \only<2,3,4>{
       
   372 \begin{textblock}{1}(1,2.1)
       
   373 \begin{bubble}[9.8cm]
       
   374 \normalsize
       
   375 lexer input: a string\smallskip\\
       
   376 \hspace{5mm}\code{"read(n);"}\medskip\\
       
   377 lexer output: a sequence of tokens\smallskip\\
       
   378 \hspace{5mm}\code{key(read) lpar id(n) rpar semi}
       
   379 \end{bubble}
       
   380 \end{textblock}}
       
   381 
       
   382 \only<3,4>{
       
   383 \begin{textblock}{1}(6,7.8)
       
   384 \begin{tabular}{c}
       
   385 \includegraphics[scale=0.2]{rosetta.jpg}\\[-2mm]
       
   386 \footnotesize lexing $\Rightarrow$ recognising words (Stone of Rosetta)
       
   387 \end{tabular}
       
   388 \end{textblock}}
       
   389 
       
   390 \only<4>{
       
   391 \begin{textblock}{1}(0.5,12)\small
       
   392 \begin{tabular}{l@{}c@{}l}
       
   393   \pcode{if}    & $\;\Rightarrow\;$ & keyword\\
       
   394   \pcode{iffoo} & $\;\Rightarrow\;$ & identifier\\
       
   395 \end{tabular}  
       
   396 \end{textblock}}
       
   397 
       
   398 \only<5>{
       
   399 \begin{textblock}{1}(1,1.5)
       
   400 \begin{bubble}[8.5cm]
       
   401 \normalsize
       
   402 parser input: a sequence of tokens\smallskip\\
       
   403 
       
   404 {\small\hspace{5mm}\code{key(read) lpar id(n) rpar semi}}\smallskip\\
       
   405 
       
   406 parser output: an abstract syntax tree\smallskip\\
       
   407 \footnotesize
       
   408 \hspace{2cm}\begin{tikzpicture}
       
   409   \node {\code{read}}
       
   410     child {node {\code{lpar}}}
       
   411     child {node {\code{n}}}
       
   412     child {node {\code{rpar}}};
       
   413 \end{tikzpicture}
       
   414 \end{bubble}
       
   415 \end{textblock}}
       
   416 
       
   417 \only<6,7>{
       
   418 \begin{textblock}{1}(1,1.5)
       
   419 \begin{bubble}[4cm]
       
   420 \normalsize
       
   421 code generator:\smallskip\\
       
   422 \hspace{5mm}\code{istore 2}\\ 
       
   423 \hspace{5mm}\code{iload 2}\\ 
       
   424 \hspace{5mm}\code{ldc 10}\\
       
   425 \hspace{5mm}\code{isub}\\
       
   426 \hspace{5mm}\code{ifeq Label2}\\ 
       
   427 \hspace{5mm}\code{iload 2}\\
       
   428 \hspace{5mm}\code{...}\\
       
   429 \end{bubble}
       
   430 \end{textblock}}
       
   431 
       
   432 \only<7>{
       
   433 \begin{textblock}{6}(8.4,7)
       
   434 \begin{bubble}[5cm]
       
   435 \mbox{\begin{tikzpicture}[scale=0.58,rounded corners=0mm]
       
   436 \begin{axis}[axis x line=bottom, axis y line=left, ylabel=secs,
       
   437     xlabel=n,
       
   438     enlargelimits=0.05,
       
   439     ybar interval=0.7, legend style=small]
       
   440 \addplot file {interpreted2.data};
       
   441 \addplot file {compiled2.data};
       
   442 %\legend{interpreted, compiled}
       
   443 \end{axis}
       
   444 \end{tikzpicture}}
       
   445 \end{bubble}
       
   446 \end{textblock}}
       
   447 
       
   448 \end{frame}
       
   449 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     
       
   450 
       
   451 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   452 \begin{frame}[c]
       
   453 \frametitle{The Acad.~Subject is Mature}
       
   454 
       
   455 \begin{itemize}
       
   456 \item Turing Machines, 1936
       
   457 \item Regular Expressions, 1956\\
       
   458 \item The first compiler for COBOL, 1957\\ (Grace Hopper)
       
   459 \item But surprisingly research papers are still published nowadays\\
       
   460 \item ``Parsing: The Solved Problem That Isn't''
       
   461 \end{itemize}
       
   462 
       
   463 \begin{flushright}
       
   464 \includegraphics[scale=0.3]{hopper.jpg}\\
       
   465 \footnotesize\textcolor{gray}{Grace Hopper}
       
   466 \end{flushright}
       
   467 
       
   468 
       
   469 \begin{flushright}
       
   470 \mbox{}\\[-6mm]
       
   471 {\footnotesize\textcolor{gray}{(she made it to David Letterman's Tonight Show,\\[-2mm]
       
   472  \url{http://www.youtube.com/watch?v=aZOxtURhfEU})}}
       
   473 \end{flushright}
       
   474 
       
   475 \end{frame}
       
   476 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     
       
   477 
       
   478 
       
   479 
       
   480 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   481 \begin{frame}[c]
       
   482 \frametitle{Lectures 1 - 5}
       
   483 
       
   484 transforming strings into structured data\\[10mm]
       
   485 
       
   486 {\LARGE\bf Lexing} {\hfill{}based on regular expressions}\medskip\\
       
   487 \hspace{5mm}(recognising ``words'')\\[6mm]
       
   488 
       
   489 {\LARGE\bf Parsing}\medskip\\
       
   490 \hspace{5mm}(recognising ``sentences'')
       
   491 
       
   492 \begin{textblock}{1}(10,9.1)
       
   493 \begin{tabular}{c}
       
   494 \includegraphics[scale=0.1]{rosetta.jpg}\\[-2mm]
       
   495 \footnotesize Stone of Rosetta
       
   496 \end{tabular}
       
   497 \end{textblock}
       
   498 
       
   499 \end{frame}
       
   500 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   501 
       
   502 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   503 \begin{frame}[t]
       
   504 \frametitle{Familiar Regular Expr.}
       
   505 \small
       
   506 \begin{center}
       
   507 \texttt{[a-z0-9\_$\backslash{}$.-]+ @ [a-z0-9$\backslash{}$.-]+ . [a-z$\backslash{}$.]\{2,6\}}
       
   508 \end{center}\smallskip
       
   509 
       
   510 \begin{center}
       
   511 \begin{tabular}{@{}lp{8.5cm}@{}}
       
   512 \pcode{re*} & matches 0 or more times\\
       
   513 \pcode{re+} & matches 1 or more times\\
       
   514 \pcode{re?} & matches 0 or 1 times\\
       
   515 \pcode{re\{n\}}	& matches exactly \pcode{n} number of times\\
       
   516 \pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\
       
   517 \pcode{[...]} & matches any single character inside the brackets\\
       
   518 \pcode{[^...]} & matches any single character not inside the 
       
   519 brackets\\
       
   520 \pcode{a-z A-Z} & character ranges\\
       
   521 \pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\
       
   522 \pcode{.} & matches every character except newline\\
       
   523 \pcode{(re)}	& groups regular expressions and remembers 
       
   524 the matched text
       
   525 \end{tabular}
       
   526 \end{center}
       
   527 
       
   528 
       
   529 \end{frame}
       
   530 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   531 
       
   532 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   533 \begin{frame}[c]
       
   534 \frametitle{Today}
       
   535 
       
   536 \begin{itemize}
       
   537 \item While the ultimate goal is to implement a small compiler for the JVM
       
   538   \ldots\bigskip
       
   539 \end{itemize}
       
   540 
       
   541 Let's start with:
       
   542 
       
   543 \begin{itemize}
       
   544 \item a web-crawler
       
   545 \item an email harvester
       
   546 %\item \textcolor{gray}{(a web-scraper)}
       
   547 \end{itemize}
       
   548 
       
   549 \end{frame}
       
   550 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
       
   551 
       
   552 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   553 \begin{frame}[t]
       
   554 \frametitle{A Web-Crawler}
       
   555 
       
   556 \mbox{}\\[10mm]
       
   557 
       
   558 \begin{enumerate}
       
   559 \item given an URL, read the corresponding webpage
       
   560 \item extract all links from it
       
   561 \item call the web-crawler again for all these links
       
   562 \end{enumerate}
       
   563 
       
   564 \end{frame}
       
   565 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   566 
       
   567 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   568 \begin{frame}[t]
       
   569 \frametitle{A Web-Crawler}
       
   570 
       
   571 \mbox{}\\[10mm]
       
   572 
       
   573 
       
   574 \begin{enumerate}
       
   575 \item given an URL, read the corresponding webpage
       
   576 \item if not possible print, out a problem
       
   577 \item if possible, extract all links from it
       
   578 \item call the web-crawler again for all these links
       
   579 \end{enumerate}\bigskip\pause
       
   580 
       
   581 \small (we need a bound for the number of recursive calls)
       
   582 
       
   583 \small (the purpose is to check all links on my own webpage)
       
   584 \end{frame}
       
   585 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   586 
       
   587 
       
   588 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   589 \begin{frame}[c]
       
   590 
       
   591 \begin{textblock}{1}(2,5)
       
   592 \begin{tabular}{c}
       
   593 \includegraphics[scale=0.15]{servers.png}\\[-2mm]
       
   594 \small Server
       
   595 \end{tabular}
       
   596 \end{textblock}
       
   597 
       
   598 \begin{textblock}{1}(5.6,4)
       
   599   \begin{tikzpicture}[scale=1.1]
       
   600   \draw[white] (0,1) node (X) {};
       
   601   \draw[white] (2,1) node (Y) {};
       
   602    \draw[white] (0,0) node (X1) {};
       
   603   \draw[white] (2,0) node (Y1) {};
       
   604    \draw[white] (0,-1) node (X2) {};
       
   605   \draw[white] (2,-1) node (Y2) {};
       
   606   \draw[red, <-, line width = 2mm] (X) -- (Y);
       
   607   \node [inner sep=5pt,label=above:\textcolor{black}{GET request}] at ($ (X)!.5!(Y) $) {};
       
   608   \draw[red, ->, line width = 2mm] (X1) -- (Y1);
       
   609   \node [inner sep=5pt,label=above:\textcolor{black}{webpage}] at ($ (X1)!.5!(Y1) $) {};
       
   610   \draw[red, <-, line width = 2mm] (X2) -- (Y2);
       
   611   \node [inner sep=7pt,label=above:\textcolor{black}{POST data}] at ($ (X2)!.5!(Y2) $) {};
       
   612   \end{tikzpicture}
       
   613 \end{textblock}
       
   614 
       
   615 
       
   616 \begin{textblock}{1}(9,5.5)
       
   617 \begin{tabular}{c}
       
   618 \includegraphics[scale=0.15]{laptop.png}\\[-2mm]
       
   619 \small Browser
       
   620 \end{tabular}
       
   621 \end{textblock}
       
   622 \end{frame}
       
   623 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   624 
       
   625   
       
   626 
       
   627  
       
   628 
       
   629 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   630 \begin{frame}[t]
       
   631 \frametitle{A Regular Expression}
       
   632 
       
   633 \begin{itemize}
       
   634 \item \ldots{} is a pattern or template for specifying strings
       
   635 \end{itemize}\bigskip
       
   636   
       
   637 \begin{center}  
       
   638 \only<1>{\scode{"https?://[^"]*"}}%
       
   639 \only<2>{\scode{""""https?://[^"]*"""".r}}
       
   640 \end{center}\bigskip\bigskip
       
   641 
       
   642 matches for example\smallskip\\  
       
   643 \hspace{2mm}\code{"http://www.foobar.com"}\\
       
   644 \hspace{2mm}\code{"https://www.tls.org"}\smallskip\\
       
   645 
       
   646 but not\smallskip\\  
       
   647 \hspace{2mm}\code{"http://www."foo"bar.com"}\\
       
   648 
       
   649 \end{frame}
       
   650 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   651 
       
   652 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   653 \begin{frame}[c]
       
   654 \frametitle{Finding Operations in Scala}
       
   655 
       
   656 {\bf\code{rexp.findAllIn(string)}}\medskip
       
   657   
       
   658 returns a list of all (sub)strings that match the 
       
   659 regular expression
       
   660 \bigskip\bigskip  
       
   661   
       
   662 
       
   663 {\bf\code{rexp.findFirstIn(string)}}\medskip
       
   664  
       
   665 returns either 
       
   666 
       
   667 \begin{itemize}
       
   668 \item \code{None} if no (sub)string matches or 
       
   669 \item \code{Some(s)} with the first (sub)string
       
   670 \end{itemize}
       
   671 
       
   672 \end{frame}
       
   673 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   674 
       
   675 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   676 \begin{frame}[t]
       
   677 \frametitle{Regular Expressions}
       
   678 
       
   679 Their inductive definition:
       
   680 
       
   681 
       
   682 \begin{textblock}{6}(2,7.5)
       
   683   \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
       
   684   \bl{$r$} & \bl{$::=$}  & \bl{$\ZERO$}  & nothing\\
       
   685          & \bl{$\mid$} & \bl{$\ONE$}       & empty string / \pcode{""} / $[]$\\
       
   686          & \bl{$\mid$} & \bl{$c$}                         & character\\
       
   687          & \bl{$\mid$} & \bl{$r_1 + r_2$}  & alternative / choice\\
       
   688          & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
       
   689          & \bl{$\mid$} & \bl{$r^*$}            & star (zero or more)\\
       
   690   \end{tabular}
       
   691   \end{textblock}
       
   692   
       
   693   
       
   694   
       
   695 \end{frame}
       
   696 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   697 
       
   698 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   699 %\begin{frame}[t]
       
   700 %\frametitle{Regular Expressions}
       
   701 %
       
   702 %\small
       
   703 %In Scala:\bigskip
       
   704 %
       
   705 %\footnotesize
       
   706 %\lstinputlisting{../progs/app51.scala}
       
   707 %
       
   708 %  
       
   709 %\end{frame}
       
   710 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   711 
       
   712 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   713 \begin{frame}[t]
       
   714 \frametitle{Strings}
       
   715 
       
   716 \ldots are lists of characters. For example \code{"hello"}
       
   717 
       
   718 \begin{center}
       
   719 \bl{$[h, e, l, l, o]$} or just \bl{$hello$}
       
   720 \end{center}
       
   721 
       
   722 the empty string: \bl{$[]$} or \bl{\pcode{""}}\bigskip\\
       
   723 
       
   724 the concatenation of two strings:
       
   725 
       
   726 \begin{center}
       
   727 \bl{$s_1 \,@\, s_2$}
       
   728 \end{center}
       
   729 
       
   730 \bl{\textit{foo $@$ bar = foobar}, \textit{baz $@\, []$ = baz}}
       
   731   
       
   732 \end{frame}
       
   733 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   734 
       
   735 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   736 \begin{frame}[c]
       
   737 \frametitle{Languages, Strings}
       
   738 
       
   739 \begin{itemize}
       
   740 \item \alert{\bf Strings} are lists of characters, for example
       
   741 \begin{center}
       
   742 \bl{$[]$},\;\bl{$abc$}  \hspace{2cm}(Pattern match: \bl{$c\!::\!s$})
       
   743 \end{center}\bigskip
       
   744 
       
   745 
       
   746 \item A \alert{\bf language} is a set of strings, for example\medskip
       
   747 \begin{center}
       
   748 \bl{$\{[], hello, \textit{foobar}, a, abc\}$}
       
   749 \end{center}\bigskip
       
   750 
       
   751 \item \alert{\bf Concatenation} of strings and languages
       
   752 
       
   753 \begin{center}
       
   754 \begin{tabular}{rcl}
       
   755 \bl{$\textit{foo}\;@\;bar$} & \bl{$=$} & \bl{$\textit{foobar}$}\medskip\\
       
   756 \bl{$A\;@\;B$} & \bl{$\dn$} & \bl{$\{ s_1\,@\,s_2 \;\mid\; s_1 \in A \wedge s_2 \in B\}$}
       
   757 \end{tabular}
       
   758 \end{center}
       
   759 
       
   760 %\item The \alert{\bf meaning} of a regular expression is a set of 
       
   761 %  strings, or language.
       
   762 \end{itemize}  
       
   763 
       
   764 \end{frame}
       
   765 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   766 
       
   767 
       
   768 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   769 \mode<presentation>{
       
   770 \begin{frame}[c]
       
   771 \frametitle{\begin{tabular}{c}The Meaning of a\\[-2mm] 
       
   772   Regular Expression\end{tabular}}
       
   773 
       
   774 \begin{textblock}{15}(1,4)
       
   775  \begin{tabular}{rcl}
       
   776  \bl{$L(\ZERO)$}  & \bl{$\dn$} & \bl{$\{\}$}\\
       
   777  \bl{$L(\ONE)$}     & \bl{$\dn$} & \bl{$\{[]\}$}\\
       
   778  \bl{$L(c)$}            & \bl{$\dn$} & \bl{$\{[c]\}$}\\
       
   779  \bl{$L(r_1 + r_2)$}    & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
       
   780  \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\
       
   781  \bl{$L(r^*)$}           & \bl{$\dn$} & \onslide<4->{\bl{$\bigcup_{0 \le n} L(r)^n$}}\\
       
   782   \end{tabular}\bigskip
       
   783   
       
   784 \onslide<2->{
       
   785 \hspace{5mm}\bl{$L(r)^0 \;\dn\; \{[]\}$}\\
       
   786 \bl{$L(r)^{n+1} \;\dn\; L(r) \,@\, L(r)^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\
       
   787 \small\hspace{5cm}\textcolor{gray}{$\{ s_1 @ s_2 \;|\; s_1\in L(r) \wedge s_2 \in L(r)^n \}$}}
       
   788 }  
       
   789     \end{textblock}
       
   790 
       
   791 \end{frame}}
       
   792 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   793 
       
   794 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   795 \begin{frame}[c]
       
   796 \frametitle{The Meaning of Matching}
       
   797 
       
   798 \begin{bubble}[10cm]
       
   799 \large\bf 
       
   800 A regular expression \bl{$r$} matches a string~\bl{$s$} 
       
   801 provided
       
   802 
       
   803 \begin{center}
       
   804 \bl{$s \in L(r)$}\\ 
       
   805 \end{center}
       
   806 \end{bubble}\bigskip\bigskip
       
   807 
       
   808 \ldots and the point of the next lecture is 
       
   809 to decide this problem as fast as possible (unlike Python,
       
   810 Ruby, Java)
       
   811 
       
   812 \end{frame}
       
   813 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   814 
       
   815 
       
   816 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   817 \begin{frame}[c]
       
   818 \frametitle{Written Exam}
       
   819 
       
   820 \begin{itemize}
       
   821 \item Accounts for 80\%.\bigskip
       
   822 
       
   823 \item The question ``\textit{Is this relevant for
       
   824       the exam?}'' is very demotivating for the lecturer!\bigskip\\
       
   825 
       
   826 \item Deal: Whatever is in the homework (and is not marked
       
   827       ``\textit{optional}'') is relevant for the exam.\bigskip
       
   828       
       
   829 \item Each lecture has also a handout. There are also handouts about
       
   830 notation and Scala.      
       
   831 \end{itemize}
       
   832 
       
   833 \end{frame}
       
   834 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   835 
       
   836 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   837 \begin{frame}[t]
       
   838 \frametitle{Coursework}
       
   839 
       
   840 \begin{itemize}
       
   841 \item Accounts for 20\%. Two strands. Choose \alert{\bf one}!\bigskip
       
   842 \end{itemize}
       
   843 
       
   844 \begin{columns}[t]
       
   845 \begin{column}{.5\textwidth}
       
   846 \underline{\bf Strand 1}\medskip
       
   847 \begin{itemize}
       
   848 \item four programming tasks:
       
   849 \begin{itemize}
       
   850 \item matcher (4\%, 12.10.) 
       
   851 \item lexer (5\%, 02.11.)
       
   852 \item parser (5\%, 23.11.)
       
   853 \item compiler (6\%, 14.12.)
       
   854 \end{itemize}
       
   855 \item in any lang.~you like,\\ but I want to see the code
       
   856 \end{itemize}
       
   857 \end{column}
       
   858 
       
   859 \hspace{-45pt}\vrule{}\hspace{10pt}
       
   860 \begin{column}{.5\textwidth}
       
   861 \underline{\bf Strand 2}\smallskip\begin{itemize}
       
   862 \item one task: prove the correctness of a regular expression matcher in 
       
   863 the \underline{Isabelle} theorem prover
       
   864 \item 20\%, submission on~14.12.\hspace{-5mm}\mbox{}
       
   865 \end{itemize}
       
   866 \end{column}
       
   867 \end{columns}\medskip
       
   868 
       
   869 \small
       
   870 \begin{itemize}
       
   871 \item Solving more than one strand will {\bf not} give you more 
       
   872 marks.
       
   873 
       
   874 \end{itemize}
       
   875 
       
   876 \end{frame}
       
   877 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   878 
       
   879 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   880 \begin{frame}[c]
       
   881 \frametitle{Lecture Capture}
       
   882 
       
   883 \begin{itemize}
       
   884 \item Hope it works\ldots\pause actually no, it does not!\medskip\pause
       
   885 \item It is important to use lecture capture wisely\\ (it is only the ``baseline''):
       
   886 \begin{itemize}  
       
   887 \item Lecture recordings are a study and revision aid.
       
   888 \item Statistically, there is a clear and direct link between attendance and
       
   889   attainment: Students who do not attend lectures, do less well in exams.
       
   890 \end{itemize}
       
   891 
       
   892 \item Attending a lecture is more than watching it online -- if you do not
       
   893 attend, you miss out!  
       
   894   
       
   895 \end{itemize}
       
   896 
       
   897 \end{frame}
       
   898 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   899 
       
   900 
       
   901 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   902 \begin{frame}[c]
       
   903 \frametitle{\begin{tabular}{c}\\[3cm]\alert{Questions?}\end{tabular}}
       
   904 
       
   905 \mbox{}
       
   906 \end{frame}
       
   907 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   908 \end{document}
       
   909 
       
   910 %%% Local Variables:  
       
   911 %%% mode: latex
       
   912 %%% TeX-master: t
       
   913 %%% End: 
       
   914