slides01.tex
changeset 16 c51178fa85fe
parent 15 cd0ceaf89c1d
equal deleted inserted replaced
15:cd0ceaf89c1d 16:c51178fa85fe
     1 \documentclass[dvipsnames,14pt,t,xelatex]{beamer}
     1 \documentclass[dvipsnames,14pt,t,xelatex]{beamer}
     2 \usepackage{./slides}
     2 \usepackage{./slides}
     3 \usepackage{./graph}
     3 \usepackage{./graph}
     4 \usepackage{./langs}
     4 \usepackage{./langs}
     5 \usepackage{./data}
     5 \usepackage{./data}
     6 
     6 \usepackage{changepage}
     7 \hfuzz=220pt 
     7 \hfuzz=220pt 
     8 
     8 
     9 \lstset{language=Scala,
     9 \lstset{language=Scala,
    10         style=mystyle,
    10         style=mystyle,
    11         numbersep=0pt,
    11         numbersep=0pt,
    80 46000 21.984721
    80 46000 21.984721
    81 50000 26.950203
    81 50000 26.950203
    82 60000 43.0327746
    82 60000 43.0327746
    83 \end{filecontents}
    83 \end{filecontents}
    84 
    84 
    85 
    85 \begin{filecontents}{re-usize.data}
       
    86 1  16
       
    87 2  33
       
    88 3  63
       
    89 4  108
       
    90 5  181
       
    91 6  297
       
    92 7  484
       
    93 8  785
       
    94 9  1271
       
    95 10  2056
       
    96 11  3325
       
    97 12  5377
       
    98 13  8696
       
    99 14  14065
       
   100 15  22751
       
   101 16  36804
       
   102 17  59541
       
   103 18  96329
       
   104 19  155852
       
   105 20  252161
       
   106 21  407991
       
   107 22  660128
       
   108 23  1068093
       
   109 24  1728193
       
   110 25  2796256
       
   111 26  4524417
       
   112 27  7320639
       
   113 \end{filecontents}
       
   114 
       
   115 \begin{filecontents}{re-ssize.data}
       
   116 1  12
       
   117 2  19
       
   118 3  19
       
   119 4  19
       
   120 5  19
       
   121 6  19
       
   122 7  19
       
   123 8  19
       
   124 9  19
       
   125 10  19
       
   126 11  19
       
   127 12  19
       
   128 13  19
       
   129 14  19
       
   130 15  19
       
   131 16  19
       
   132 17  19
       
   133 18  19
       
   134 19  19
       
   135 20  19
       
   136 21  19
       
   137 22  19
       
   138 23  19
       
   139 24  19
       
   140 25  19
       
   141 26  19
       
   142 27  19
       
   143 28  19
       
   144 29  19
       
   145 30  19
       
   146 31  19
       
   147 32  19
       
   148 33  19
       
   149 34  19
       
   150 35  19
       
   151 36  19
       
   152 37  19
       
   153 38  19
       
   154 39  19
       
   155 40  19
       
   156 \end{filecontents}
    86 
   157 
    87 \begin{document}
   158 \begin{document}
    88 
   159 
    89 
   160 
    90 
   161 
    94 
   165 
    95 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   166 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    96 \begin{frame}[t]
   167 \begin{frame}[t]
    97 \frametitle{%  
   168 \frametitle{%  
    98   \begin{tabular}{@ {}c@ {}}
   169   \begin{tabular}{@ {}c@ {}}
    99   \\[-3mm]
   170   \\[-1mm]
   100   \LARGE Compilers and \\[-1mm] 
   171   \LARGE Fast Regular Expression  \\[-1mm] 
   101   \LARGE Formal Languages (1)\\[-3mm] 
   172   \LARGE Matching Using Derivatives\\[-3mm] 
   102   \end{tabular}}
   173   \end{tabular}}
   103 
   174 
   104   \begin{center}
   175   \begin{center}
   105   %\includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm}
   176   %\includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm}
   106   %\includegraphics[scale=0.31]{pics/ante2.jpg}\\
   177   %\includegraphics[scale=0.31]{pics/ante2.jpg}\\
   108   \end{center}
   179   \end{center}
   109 
   180 
   110   \normalsize
   181   \normalsize
   111   \begin{center}
   182   \begin{center}
   112   \begin{tabular}{ll}
   183   \begin{tabular}{ll}
   113   Email:  & christian.urban at kcl.ac.uk\\
   184   %Email:  & christian.urban at kcl.ac.uk\\
   114   Office: & N\liningnums{7.07} (North Wing, Bush House)\\
   185   %Office: & N\liningnums{7.07} (North Wing, Bush House)\\
   115   Slides: & KEATS
   186   %Slides: & KEATS
       
   187   Student: & Chengsong Tan\\
       
   188   Supervisor: & Christian Urban \\
       
   189   Date: & 2019/5/10
   116   \end{tabular}
   190   \end{tabular}
   117   \end{center}
   191   \end{center}
   118 
   192 
   119 \end{frame}
   193 \end{frame}
   120 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   194 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   121 
   195 
   122 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   123 \begin{frame}[t]
       
   124 \frametitle{Why Study Compilers?}
       
   125 
       
   126 John Regehr {\small(Univ.~Utah, LLVM compiler hacker)}\smallskip\\
       
   127 
       
   128 \begin{bubble}[10.5cm]
       
   129   \bf ``\ldots{}It’s effectively a perpetual
       
   130   employment act for solid compiler hackers.''
       
   131 \end{bubble}
       
   132 
       
   133 \onslide<1->{
       
   134 \only<2>{
       
   135 \begin{itemize}
       
   136 \item {\bf Hardware is getting weirder
       
   137   rather than getting clocked faster}
       
   138 
       
   139 \begin{itemize}
       
   140 \item  Almost all processors are
       
   141   multicores nowadays and it looks like there is increasing asymmetry in
       
   142   resources across cores. Processors come with vector units, crypto
       
   143   accelerators etc. We have DSPs, GPUs,
       
   144   ARM big.little, and Xeon Phi. This is only scratching the
       
   145   surface.
       
   146 \end{itemize}  
       
   147 \end{itemize}}
       
   148 \only<3>{
       
   149 \begin{itemize}
       
   150 \item {\bf We’re getting tired of low-level languages and
       
   151     their associated security disasters}
       
   152   
       
   153 \begin{itemize}
       
   154 \item 
       
   155   We want to write new code, to
       
   156   whatever extent possible, in safer, higher-level
       
   157   languages. Compilers are caught right in the middle of these
       
   158   opposing trends: one of their main jobs is to help bridge the large
       
   159   and growing gap between increasingly high-level languages and
       
   160   increasingly wacky platforms.
       
   161 \end{itemize}  
       
   162 \end{itemize}}}
       
   163 
       
   164 \end{frame}
       
   165 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   166 
       
   167 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   168 \begin{frame}[c]
       
   169 \frametitle{Why Bother?}
       
   170 
       
   171 \begin{columns}[t]
       
   172 \begin{column}{.5\textwidth}
       
   173 Ruby, Python, Java 8\medskip\\
       
   174 \begin{tikzpicture}\footnotesize
       
   175 \begin{axis}[
       
   176     xlabel={$n$},
       
   177     x label style={at={(1.05,0.0)}},
       
   178     ylabel={time in secs},
       
   179     enlargelimits=false,
       
   180     xtick={0,5,...,30},
       
   181     xmax=33,
       
   182     ymax=35,
       
   183     ytick={0,5,...,30},
       
   184     scaled ticks=false,
       
   185     axis lines=left,
       
   186     width=5.5cm,
       
   187     height=4cm, 
       
   188     legend entries={Python,Ruby},  
       
   189     legend pos=north west,
       
   190     legend cell align=left]
       
   191 \addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
       
   192 \addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data};
       
   193 \end{axis}
       
   194 \end{tikzpicture}
       
   195 \begin{tikzpicture}\footnotesize
       
   196 \begin{axis}[
       
   197     xlabel={$n$},
       
   198     x label style={at={(1.05,0.0)}},
       
   199     ylabel={time in secs},
       
   200     enlargelimits=false,
       
   201     xtick={0,5,...,30},
       
   202     xmax=33,
       
   203     ymax=35,
       
   204     ytick={0,5,...,30},
       
   205     scaled ticks=false,
       
   206     axis lines=left,
       
   207     width=5.5cm,
       
   208     height=4cm, 
       
   209     legend entries={Python, Java 8},  
       
   210     legend pos=north west,
       
   211     legend cell align=left]
       
   212 \addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};   
       
   213 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
       
   214 \end{axis}
       
   215 \end{tikzpicture}
       
   216 
       
   217 \end{column}
       
   218 \begin{column}{.5\textwidth}
       
   219 Us (after next lecture)\medskip\\
       
   220 \begin{tikzpicture}\footnotesize
       
   221 \begin{axis}[
       
   222     xlabel={$n$},
       
   223     x label style={at={(1.07,0.0)}},
       
   224     ylabel={time in secs},
       
   225     enlargelimits=false,
       
   226     xtick={0,5000,...,10000},
       
   227     xmax=11000,
       
   228     ymax=35,
       
   229     ytick={0,5,...,30},
       
   230     scaled ticks=false,
       
   231     axis lines=left,
       
   232     width=5.5cm,
       
   233     height=4cm]
       
   234 \addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
       
   235 \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
       
   236 \end{axis}
       
   237 \end{tikzpicture}
       
   238 \begin{tikzpicture}\footnotesize
       
   239 \begin{axis}[
       
   240     xlabel={$n$},
       
   241     x label style={at={(1.07,0.0)}},
       
   242     ylabel={time in secs},
       
   243     enlargelimits=false,
       
   244     ymax=35,
       
   245     ytick={0,5,...,30},
       
   246     scaled ticks=false,
       
   247     axis lines=left,
       
   248     width=5.5cm,
       
   249     height=4cm]
       
   250 \addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
       
   251 \end{axis}
       
   252 \end{tikzpicture}
       
   253 \end{column}
       
   254 \end{columns}\bigskip
       
   255 
       
   256 \small\centering
       
   257 matching \texttt{[a?]\{n\}[a]\{n\}} and \texttt{(a*)*b}
       
   258 against $\underbrace{\texttt{a}...\texttt{a}}_n$
       
   259 \end{frame} 
       
   260 
       
   261 \begin{frame}
       
   262 \begin{column}{.5\textwidth}
       
   263 \begin{tikzpicture}
       
   264 \begin{axis}[
       
   265     xlabel={$n$},
       
   266     x label style={at={(1.05,0.0)}},
       
   267     ylabel={time in secs},
       
   268     y label style={at={(0.06,0.5)}},
       
   269     enlargelimits=false,
       
   270     xtick={0,5,...,30},
       
   271     xmax=33,
       
   272     ymax=45,
       
   273     ytick={0,10,...,40},
       
   274     scaled ticks=false,
       
   275     axis lines=left,
       
   276     width=6cm,
       
   277     height=4.5cm, 
       
   278     legend entries={Python, Java 8},  
       
   279     legend pos=north west]
       
   280 \addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
       
   281 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
       
   282 \end{axis}
       
   283 \end{tikzpicture}
       
   284 \begin{tikzpicture}
       
   285 \begin{axis}[
       
   286     xlabel={$n$},
       
   287     x label style={at={(1.05,0.0)}},
       
   288     ylabel={time in secs},
       
   289     y label style={at={(0.06,0.5)}},
       
   290     %enlargelimits=false,
       
   291     %xtick={0,5000,...,30000},
       
   292     xmax=65000,
       
   293     ymax=45,
       
   294     ytick={0,10,...,40},
       
   295     scaled ticks=false,
       
   296     axis lines=left,
       
   297     width=6cm,
       
   298     height=4.5cm, 
       
   299     legend entries={Java 9},  
       
   300     legend pos=north west]
       
   301 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java9.data};
       
   302 \end{axis}
       
   303 \end{tikzpicture}
       
   304 \end{column}
       
   305 \end{frame}
       
   306 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
       
   307 
       
   308 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   309 \begin{frame}[c]
       
   310 \frametitle{Evil Regular Expressions}
       
   311 
       
   312 \begin{itemize}
       
   313 \item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip
       
   314 \item Evil regular expressions\medskip
       
   315 \begin{itemize}
       
   316 \item \bl{$(a^{?\{n\}}) \cdot a^{\{n\}}$}
       
   317 \item \bl{$(a^*)^*\cdot b$}
       
   318 \item \bl{$([a$\,-\,$z]^+)^*$}
       
   319 \item \bl{$(a + a \cdot a)^*$}
       
   320 \item \bl{$(a + a^?)^*$}
       
   321 \end{itemize}
       
   322 
       
   323 \item sometimes also called \alert{catastrophic backtracking}
       
   324 \item this is a problem for \alert{N}etwork \alert{I}ntrusion
       
   325   \alert{D}etection systems, StackExchange, Atom editor
       
   326 \item \url{https://vimeo.com/112065252}  
       
   327 \end{itemize}
       
   328 
       
   329 \end{frame}
       
   330 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   331 
       
   332 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   333 \begin{frame}[c]
       
   334 \frametitle{The Goal of this Module}
       
   335 
       
   336 \begin{center}
       
   337   \begin{tikzpicture}[scale=1,
       
   338                       node/.style={
       
   339                       rectangle,rounded corners=3mm,
       
   340                       very thick,draw=black!50,minimum height=18mm, minimum width=20mm,
       
   341                       top color=white,bottom color=black!20}]
       
   342 
       
   343   \node at (3.05, 1.8) {\Large\bf write a compiler};
       
   344 
       
   345   \node (0) at (-2.3,0) {};  
       
   346   \node [above=5mm of 0]
       
   347   {\makebox[0mm]{\footnotesize
       
   348       \begin{tabular}{@{}l@{}}input\\[-1mm]program\end{tabular}}}; 
       
   349      
       
   350   \node (A) at (0,0)  [node] {};
       
   351   \node [below right] at (A.north west) {lexer};
       
   352 
       
   353   \node (B) at (3,0)  [node] {};
       
   354   \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser};
       
   355 
       
   356   \node (C) at (6,0)  [node] {};
       
   357   \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen};
       
   358 
       
   359   \node (1) at (8.4,0) {};
       
   360   \node [above=5mm of 1]
       
   361   {\makebox[0mm]{\footnotesize
       
   362       \begin{tabular}{@{}r@{}}binary\\[-1mm]code\end{tabular}}};
       
   363 
       
   364   \draw [->,line width=4mm] (0) -- (A); 
       
   365   \draw [->,line width=4mm] (A) -- (B); 
       
   366   \draw [->,line width=4mm] (B) -- (C); 
       
   367   \draw [->,line width=4mm] (C) -- (1); 
       
   368   \end{tikzpicture}
       
   369   \end{center}
       
   370 
       
   371 \only<2,3,4>{
       
   372 \begin{textblock}{1}(1,2.1)
       
   373 \begin{bubble}[9.8cm]
       
   374 \normalsize
       
   375 lexer input: a string\smallskip\\
       
   376 \hspace{5mm}\code{"read(n);"}\medskip\\
       
   377 lexer output: a sequence of tokens\smallskip\\
       
   378 \hspace{5mm}\code{key(read) lpar id(n) rpar semi}
       
   379 \end{bubble}
       
   380 \end{textblock}}
       
   381 
       
   382 \only<3,4>{
       
   383 \begin{textblock}{1}(6,7.8)
       
   384 \begin{tabular}{c}
       
   385 \includegraphics[scale=0.2]{rosetta.jpg}\\[-2mm]
       
   386 \footnotesize lexing $\Rightarrow$ recognising words (Stone of Rosetta)
       
   387 \end{tabular}
       
   388 \end{textblock}}
       
   389 
       
   390 \only<4>{
       
   391 \begin{textblock}{1}(0.5,12)\small
       
   392 \begin{tabular}{l@{}c@{}l}
       
   393   \pcode{if}    & $\;\Rightarrow\;$ & keyword\\
       
   394   \pcode{iffoo} & $\;\Rightarrow\;$ & identifier\\
       
   395 \end{tabular}  
       
   396 \end{textblock}}
       
   397 
       
   398 \only<5>{
       
   399 \begin{textblock}{1}(1,1.5)
       
   400 \begin{bubble}[8.5cm]
       
   401 \normalsize
       
   402 parser input: a sequence of tokens\smallskip\\
       
   403 
       
   404 {\small\hspace{5mm}\code{key(read) lpar id(n) rpar semi}}\smallskip\\
       
   405 
       
   406 parser output: an abstract syntax tree\smallskip\\
       
   407 \footnotesize
       
   408 \hspace{2cm}\begin{tikzpicture}
       
   409   \node {\code{read}}
       
   410     child {node {\code{lpar}}}
       
   411     child {node {\code{n}}}
       
   412     child {node {\code{rpar}}};
       
   413 \end{tikzpicture}
       
   414 \end{bubble}
       
   415 \end{textblock}}
       
   416 
       
   417 \only<6,7>{
       
   418 \begin{textblock}{1}(1,1.5)
       
   419 \begin{bubble}[4cm]
       
   420 \normalsize
       
   421 code generator:\smallskip\\
       
   422 \hspace{5mm}\code{istore 2}\\ 
       
   423 \hspace{5mm}\code{iload 2}\\ 
       
   424 \hspace{5mm}\code{ldc 10}\\
       
   425 \hspace{5mm}\code{isub}\\
       
   426 \hspace{5mm}\code{ifeq Label2}\\ 
       
   427 \hspace{5mm}\code{iload 2}\\
       
   428 \hspace{5mm}\code{...}\\
       
   429 \end{bubble}
       
   430 \end{textblock}}
       
   431 
       
   432 \only<7>{
       
   433 \begin{textblock}{6}(8.4,7)
       
   434 \begin{bubble}[5cm]
       
   435 \mbox{\begin{tikzpicture}[scale=0.58,rounded corners=0mm]
       
   436 \begin{axis}[axis x line=bottom, axis y line=left, ylabel=secs,
       
   437     xlabel=n,
       
   438     enlargelimits=0.05,
       
   439     ybar interval=0.7, legend style=small]
       
   440 \addplot file {interpreted2.data};
       
   441 \addplot file {compiled2.data};
       
   442 %\legend{interpreted, compiled}
       
   443 \end{axis}
       
   444 \end{tikzpicture}}
       
   445 \end{bubble}
       
   446 \end{textblock}}
       
   447 
       
   448 \end{frame}
       
   449 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     
       
   450 
       
   451 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   452 \begin{frame}[c]
       
   453 \frametitle{The Acad.~Subject is Mature}
       
   454 
       
   455 \begin{itemize}
       
   456 \item Turing Machines, 1936
       
   457 \item Regular Expressions, 1956\\
       
   458 \item The first compiler for COBOL, 1957\\ (Grace Hopper)
       
   459 \item But surprisingly research papers are still published nowadays\\
       
   460 \item ``Parsing: The Solved Problem That Isn't''
       
   461 \end{itemize}
       
   462 
       
   463 \begin{flushright}
       
   464 \includegraphics[scale=0.3]{hopper.jpg}\\
       
   465 \footnotesize\textcolor{gray}{Grace Hopper}
       
   466 \end{flushright}
       
   467 
       
   468 
       
   469 \begin{flushright}
       
   470 \mbox{}\\[-6mm]
       
   471 {\footnotesize\textcolor{gray}{(she made it to David Letterman's Tonight Show,\\[-2mm]
       
   472  \url{http://www.youtube.com/watch?v=aZOxtURhfEU})}}
       
   473 \end{flushright}
       
   474 
       
   475 \end{frame}
       
   476 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%     
       
   477 
       
   478 
       
   479 
       
   480 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   481 \begin{frame}[c]
       
   482 \frametitle{Lectures 1 - 5}
       
   483 
       
   484 transforming strings into structured data\\[10mm]
       
   485 
       
   486 {\LARGE\bf Lexing} {\hfill{}based on regular expressions}\medskip\\
       
   487 \hspace{5mm}(recognising ``words'')\\[6mm]
       
   488 
       
   489 {\LARGE\bf Parsing}\medskip\\
       
   490 \hspace{5mm}(recognising ``sentences'')
       
   491 
       
   492 \begin{textblock}{1}(10,9.1)
       
   493 \begin{tabular}{c}
       
   494 \includegraphics[scale=0.1]{rosetta.jpg}\\[-2mm]
       
   495 \footnotesize Stone of Rosetta
       
   496 \end{tabular}
       
   497 \end{textblock}
       
   498 
       
   499 \end{frame}
       
   500 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   501 
       
   502 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   503 \begin{frame}[t]
       
   504 \frametitle{Familiar Regular Expr.}
       
   505 \small
       
   506 \begin{center}
       
   507 \texttt{[a-z0-9\_$\backslash{}$.-]+ @ [a-z0-9$\backslash{}$.-]+ . [a-z$\backslash{}$.]\{2,6\}}
       
   508 \end{center}\smallskip
       
   509 
       
   510 \begin{center}
       
   511 \begin{tabular}{@{}lp{8.5cm}@{}}
       
   512 \pcode{re*} & matches 0 or more times\\
       
   513 \pcode{re+} & matches 1 or more times\\
       
   514 \pcode{re?} & matches 0 or 1 times\\
       
   515 \pcode{re\{n\}}	& matches exactly \pcode{n} number of times\\
       
   516 \pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\
       
   517 \pcode{[...]} & matches any single character inside the brackets\\
       
   518 \pcode{[^...]} & matches any single character not inside the 
       
   519 brackets\\
       
   520 \pcode{a-z A-Z} & character ranges\\
       
   521 \pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\
       
   522 \pcode{.} & matches every character except newline\\
       
   523 \pcode{(re)}	& groups regular expressions and remembers 
       
   524 the matched text
       
   525 \end{tabular}
       
   526 \end{center}
       
   527 
       
   528 
       
   529 \end{frame}
       
   530 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   531 
       
   532 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   533 \begin{frame}[c]
       
   534 \frametitle{Today}
       
   535 
       
   536 \begin{itemize}
       
   537 \item While the ultimate goal is to implement a small compiler for the JVM
       
   538   \ldots\bigskip
       
   539 \end{itemize}
       
   540 
       
   541 Let's start with:
       
   542 
       
   543 \begin{itemize}
       
   544 \item a web-crawler
       
   545 \item an email harvester
       
   546 %\item \textcolor{gray}{(a web-scraper)}
       
   547 \end{itemize}
       
   548 
       
   549 \end{frame}
       
   550 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
       
   551 
       
   552 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   553 \begin{frame}[t]
       
   554 \frametitle{A Web-Crawler}
       
   555 
       
   556 \mbox{}\\[10mm]
       
   557 
       
   558 \begin{enumerate}
       
   559 \item given an URL, read the corresponding webpage
       
   560 \item extract all links from it
       
   561 \item call the web-crawler again for all these links
       
   562 \end{enumerate}
       
   563 
       
   564 \end{frame}
       
   565 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   566 
       
   567 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   568 \begin{frame}[t]
       
   569 \frametitle{A Web-Crawler}
       
   570 
       
   571 \mbox{}\\[10mm]
       
   572 
       
   573 
       
   574 \begin{enumerate}
       
   575 \item given an URL, read the corresponding webpage
       
   576 \item if not possible print, out a problem
       
   577 \item if possible, extract all links from it
       
   578 \item call the web-crawler again for all these links
       
   579 \end{enumerate}\bigskip\pause
       
   580 
       
   581 \small (we need a bound for the number of recursive calls)
       
   582 
       
   583 \small (the purpose is to check all links on my own webpage)
       
   584 \end{frame}
       
   585 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   586 
       
   587 
       
   588 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   589 \begin{frame}[c]
       
   590 
       
   591 \begin{textblock}{1}(2,5)
       
   592 \begin{tabular}{c}
       
   593 \includegraphics[scale=0.15]{servers.png}\\[-2mm]
       
   594 \small Server
       
   595 \end{tabular}
       
   596 \end{textblock}
       
   597 
       
   598 \begin{textblock}{1}(5.6,4)
       
   599   \begin{tikzpicture}[scale=1.1]
       
   600   \draw[white] (0,1) node (X) {};
       
   601   \draw[white] (2,1) node (Y) {};
       
   602    \draw[white] (0,0) node (X1) {};
       
   603   \draw[white] (2,0) node (Y1) {};
       
   604    \draw[white] (0,-1) node (X2) {};
       
   605   \draw[white] (2,-1) node (Y2) {};
       
   606   \draw[red, <-, line width = 2mm] (X) -- (Y);
       
   607   \node [inner sep=5pt,label=above:\textcolor{black}{GET request}] at ($ (X)!.5!(Y) $) {};
       
   608   \draw[red, ->, line width = 2mm] (X1) -- (Y1);
       
   609   \node [inner sep=5pt,label=above:\textcolor{black}{webpage}] at ($ (X1)!.5!(Y1) $) {};
       
   610   \draw[red, <-, line width = 2mm] (X2) -- (Y2);
       
   611   \node [inner sep=7pt,label=above:\textcolor{black}{POST data}] at ($ (X2)!.5!(Y2) $) {};
       
   612   \end{tikzpicture}
       
   613 \end{textblock}
       
   614 
       
   615 
       
   616 \begin{textblock}{1}(9,5.5)
       
   617 \begin{tabular}{c}
       
   618 \includegraphics[scale=0.15]{laptop.png}\\[-2mm]
       
   619 \small Browser
       
   620 \end{tabular}
       
   621 \end{textblock}
       
   622 \end{frame}
       
   623 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   624 
       
   625   
       
   626 
       
   627  
       
   628 
       
   629 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   630 \begin{frame}[t]
       
   631 \frametitle{A Regular Expression}
       
   632 
       
   633 \begin{itemize}
       
   634 \item \ldots{} is a pattern or template for specifying strings
       
   635 \end{itemize}\bigskip
       
   636   
       
   637 \begin{center}  
       
   638 \only<1>{\scode{"https?://[^"]*"}}%
       
   639 \only<2>{\scode{""""https?://[^"]*"""".r}}
       
   640 \end{center}\bigskip\bigskip
       
   641 
       
   642 matches for example\smallskip\\  
       
   643 \hspace{2mm}\code{"http://www.foobar.com"}\\
       
   644 \hspace{2mm}\code{"https://www.tls.org"}\smallskip\\
       
   645 
       
   646 but not\smallskip\\  
       
   647 \hspace{2mm}\code{"http://www."foo"bar.com"}\\
       
   648 
       
   649 \end{frame}
       
   650 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   651 
       
   652 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   653 \begin{frame}[c]
       
   654 \frametitle{Finding Operations in Scala}
       
   655 
       
   656 {\bf\code{rexp.findAllIn(string)}}\medskip
       
   657   
       
   658 returns a list of all (sub)strings that match the 
       
   659 regular expression
       
   660 \bigskip\bigskip  
       
   661   
       
   662 
       
   663 {\bf\code{rexp.findFirstIn(string)}}\medskip
       
   664  
       
   665 returns either 
       
   666 
       
   667 \begin{itemize}
       
   668 \item \code{None} if no (sub)string matches or 
       
   669 \item \code{Some(s)} with the first (sub)string
       
   670 \end{itemize}
       
   671 
       
   672 \end{frame}
       
   673 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   674 
       
   675 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   676 \begin{frame}[t]
   196 \begin{frame}[t]
   677 \frametitle{Regular Expressions}
   197 \frametitle{Regular Expressions}
   678 
   198 
   679 Their inductive definition:
   199 Their inductive definition:\\
   680 
   200 \hspace{10pt}
   681 
   201 \vspace{100pt}
   682 \begin{textblock}{6}(2,7.5)
   202 
       
   203 \begin{textblock}{6}(1.5,3.5)
   683   \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
   204   \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
   684   \bl{$r$} & \bl{$::=$}  & \bl{$\ZERO$}  & nothing\\
   205   \bl{$r$} & \bl{$::=$}  & \bl{$\ZERO$}  & nothing\\
   685          & \bl{$\mid$} & \bl{$\ONE$}       & empty string / \pcode{""} / $[]$\\
   206          & \bl{$\mid$} & \bl{$\ONE$}       & empty string / \pcode{""} / $[]$\\
   686          & \bl{$\mid$} & \bl{$c$}                         & character\\
   207          & \bl{$\mid$} & \bl{$c$}                         & character\\
   687          & \bl{$\mid$} & \bl{$r_1 + r_2$}  & alternative / choice\\
   208          & \bl{$\mid$} & \bl{$r_1 + r_2$}  & alternative / choice\\
   688          & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
   209          & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
   689          & \bl{$\mid$} & \bl{$r^*$}            & star (zero or more)\\
   210          & \bl{$\mid$} & \bl{$r^*$}            & star (zero or more)\\
   690   \end{tabular}
   211   \end{tabular}
   691   \end{textblock}
   212   \end{textblock}
   692   
   213   
       
   214  \begin{itemize}
       
   215 \item { Formalized by Stephen Coles Kleene in 1950s} 
       
   216 \item {\bf What could be possibly interesting?} 
       
   217  \end{itemize} 
       
   218 \end{frame}
       
   219 
       
   220 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   221 \iffalse
       
   222 \begin{frame}[t]
       
   223 \frametitle{Regular Expressions}
       
   224 
       
   225 John Regehr {\small(Univ.~Utah, LLVM compiler hacker)}\smallskip\\
       
   226 
       
   227 \begin{bubble}[10.5cm]
       
   228   \bf ``\ldots{}It’s effectively a perpetual
       
   229   employment act for solid compiler hackers.''
       
   230 \end{bubble}
       
   231 
       
   232 \onslide<1->{
       
   233 \only<2>{
       
   234 \begin{itemize}
       
   235 \item {\bf Hardware is getting weirder
       
   236   rather than getting clocked faster}
       
   237 
       
   238 \begin{itemize}
       
   239 \item  Almost all processors are
       
   240   multicores nowadays and it looks like there is increasing asymmetry in
       
   241   resources across cores. Processors come with vector units, crypto
       
   242   accelerators etc. We have DSPs, GPUs,
       
   243   ARM big.little, and Xeon Phi. This is only scratching the
       
   244   surface.
       
   245 \end{itemize}  
       
   246 \end{itemize}}
       
   247 \only<3>{
       
   248 \begin{itemize}
       
   249 \item {\bf We’re getting tired of low-level languages and
       
   250     their associated security disasters}
   693   
   251   
   694   
   252 \begin{itemize}
   695 \end{frame}
   253 \item 
   696 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
   254   We want to write new code, to
   697 
   255   whatever extent possible, in safer, higher-level
   698 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   256   languages. Compilers are caught right in the middle of these
   699 %\begin{frame}[t]
   257   opposing trends: one of their main jobs is to help bridge the large
   700 %\frametitle{Regular Expressions}
   258   and growing gap between increasingly high-level languages and
   701 %
   259   increasingly wacky platforms.
   702 %\small
   260 \end{itemize}  
   703 %In Scala:\bigskip
   261 \end{itemize}}}
   704 %
   262 
   705 %\footnotesize
   263 \end{frame}
   706 %\lstinputlisting{../progs/app51.scala}
   264 \fi
   707 %
   265 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   708 %  
   266 
   709 %\end{frame}
   267 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   710 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
   268 \iffalse
   711 
       
   712 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   713 \begin{frame}[t]
       
   714 \frametitle{Strings}
       
   715 
       
   716 \ldots are lists of characters. For example \code{"hello"}
       
   717 
       
   718 \begin{center}
       
   719 \bl{$[h, e, l, l, o]$} or just \bl{$hello$}
       
   720 \end{center}
       
   721 
       
   722 the empty string: \bl{$[]$} or \bl{\pcode{""}}\bigskip\\
       
   723 
       
   724 the concatenation of two strings:
       
   725 
       
   726 \begin{center}
       
   727 \bl{$s_1 \,@\, s_2$}
       
   728 \end{center}
       
   729 
       
   730 \bl{\textit{foo $@$ bar = foobar}, \textit{baz $@\, []$ = baz}}
       
   731   
       
   732 \end{frame}
       
   733 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   734 
       
   735 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   736 \begin{frame}[c]
   269 \begin{frame}[c]
   737 \frametitle{Languages, Strings}
   270 \frametitle{Why Bother?}
   738 
       
   739 \begin{itemize}
       
   740 \item \alert{\bf Strings} are lists of characters, for example
       
   741 \begin{center}
       
   742 \bl{$[]$},\;\bl{$abc$}  \hspace{2cm}(Pattern match: \bl{$c\!::\!s$})
       
   743 \end{center}\bigskip
       
   744 
       
   745 
       
   746 \item A \alert{\bf language} is a set of strings, for example\medskip
       
   747 \begin{center}
       
   748 \bl{$\{[], hello, \textit{foobar}, a, abc\}$}
       
   749 \end{center}\bigskip
       
   750 
       
   751 \item \alert{\bf Concatenation} of strings and languages
       
   752 
       
   753 \begin{center}
       
   754 \begin{tabular}{rcl}
       
   755 \bl{$\textit{foo}\;@\;bar$} & \bl{$=$} & \bl{$\textit{foobar}$}\medskip\\
       
   756 \bl{$A\;@\;B$} & \bl{$\dn$} & \bl{$\{ s_1\,@\,s_2 \;\mid\; s_1 \in A \wedge s_2 \in B\}$}
       
   757 \end{tabular}
       
   758 \end{center}
       
   759 
       
   760 %\item The \alert{\bf meaning} of a regular expression is a set of 
       
   761 %  strings, or language.
       
   762 \end{itemize}  
       
   763 
       
   764 \end{frame}
       
   765 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   766 
       
   767 
       
   768 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   769 \mode<presentation>{
       
   770 \begin{frame}[c]
       
   771 \frametitle{\begin{tabular}{c}The Meaning of a\\[-2mm] 
       
   772   Regular Expression\end{tabular}}
       
   773 
       
   774 \begin{textblock}{15}(1,4)
       
   775  \begin{tabular}{rcl}
       
   776  \bl{$L(\ZERO)$}  & \bl{$\dn$} & \bl{$\{\}$}\\
       
   777  \bl{$L(\ONE)$}     & \bl{$\dn$} & \bl{$\{[]\}$}\\
       
   778  \bl{$L(c)$}            & \bl{$\dn$} & \bl{$\{[c]\}$}\\
       
   779  \bl{$L(r_1 + r_2)$}    & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
       
   780  \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\
       
   781  \bl{$L(r^*)$}           & \bl{$\dn$} & \onslide<4->{\bl{$\bigcup_{0 \le n} L(r)^n$}}\\
       
   782   \end{tabular}\bigskip
       
   783   
       
   784 \onslide<2->{
       
   785 \hspace{5mm}\bl{$L(r)^0 \;\dn\; \{[]\}$}\\
       
   786 \bl{$L(r)^{n+1} \;\dn\; L(r) \,@\, L(r)^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\
       
   787 \small\hspace{5cm}\textcolor{gray}{$\{ s_1 @ s_2 \;|\; s_1\in L(r) \wedge s_2 \in L(r)^n \}$}}
       
   788 }  
       
   789     \end{textblock}
       
   790 
       
   791 \end{frame}}
       
   792 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   793 
       
   794 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   795 \begin{frame}[c]
       
   796 \frametitle{The Meaning of Matching}
       
   797 
       
   798 \begin{bubble}[10cm]
       
   799 \large\bf 
       
   800 A regular expression \bl{$r$} matches a string~\bl{$s$} 
       
   801 provided
       
   802 
       
   803 \begin{center}
       
   804 \bl{$s \in L(r)$}\\ 
       
   805 \end{center}
       
   806 \end{bubble}\bigskip\bigskip
       
   807 
       
   808 \ldots and the point of the next lecture is 
       
   809 to decide this problem as fast as possible (unlike Python,
       
   810 Ruby, Java)
       
   811 
       
   812 \end{frame}
       
   813 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   814 
       
   815 
       
   816 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   817 \begin{frame}[c]
       
   818 \frametitle{Written Exam}
       
   819 
       
   820 \begin{itemize}
       
   821 \item Accounts for 80\%.\bigskip
       
   822 
       
   823 \item The question ``\textit{Is this relevant for
       
   824       the exam?}'' is very demotivating for the lecturer!\bigskip\\
       
   825 
       
   826 \item Deal: Whatever is in the homework (and is not marked
       
   827       ``\textit{optional}'') is relevant for the exam.\bigskip
       
   828       
       
   829 \item Each lecture has also a handout. There are also handouts about
       
   830 notation and Scala.      
       
   831 \end{itemize}
       
   832 
       
   833 \end{frame}
       
   834 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
       
   835 
       
   836 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   837 \begin{frame}[t]
       
   838 \frametitle{Coursework}
       
   839 
       
   840 \begin{itemize}
       
   841 \item Accounts for 20\%. Two strands. Choose \alert{\bf one}!\bigskip
       
   842 \end{itemize}
       
   843 
   271 
   844 \begin{columns}[t]
   272 \begin{columns}[t]
   845 \begin{column}{.5\textwidth}
   273 \begin{column}{.5\textwidth}
   846 \underline{\bf Strand 1}\medskip
   274 Ruby, Python, Java 8\medskip\\
   847 \begin{itemize}
   275 \begin{tikzpicture}\footnotesize
   848 \item four programming tasks:
   276 \begin{axis}[
   849 \begin{itemize}
   277     xlabel={$n$},
   850 \item matcher (4\%, 12.10.) 
   278     x label style={at={(1.05,0.0)}},
   851 \item lexer (5\%, 02.11.)
   279     ylabel={time in secs},
   852 \item parser (5\%, 23.11.)
   280     enlargelimits=false,
   853 \item compiler (6\%, 14.12.)
   281     xtick={0,5,...,30},
   854 \end{itemize}
   282     xmax=33,
   855 \item in any lang.~you like,\\ but I want to see the code
   283     ymax=35,
   856 \end{itemize}
   284     ytick={0,5,...,30},
       
   285     scaled ticks=false,
       
   286     axis lines=left,
       
   287     width=5.5cm,
       
   288     height=4cm, 
       
   289     legend entries={Python,Ruby},  
       
   290     legend pos=north west,
       
   291     legend cell align=left]
       
   292 \addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
       
   293 \addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data};
       
   294 \end{axis}
       
   295 \end{tikzpicture}
       
   296 \begin{tikzpicture}\footnotesize
       
   297 \begin{axis}[
       
   298     xlabel={$n$},
       
   299     x label style={at={(1.05,0.0)}},
       
   300     ylabel={time in secs},
       
   301     enlargelimits=false,
       
   302     xtick={0,5,...,30},
       
   303     xmax=33,
       
   304     ymax=35,
       
   305     ytick={0,5,...,30},
       
   306     scaled ticks=false,
       
   307     axis lines=left,
       
   308     width=5.5cm,
       
   309     height=4cm, 
       
   310     legend entries={Python, Java 8},  
       
   311     legend pos=north west,
       
   312     legend cell align=left]
       
   313 \addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};   
       
   314 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
       
   315 \end{axis}
       
   316 \end{tikzpicture}
       
   317 
   857 \end{column}
   318 \end{column}
   858 
       
   859 \hspace{-45pt}\vrule{}\hspace{10pt}
       
   860 \begin{column}{.5\textwidth}
   319 \begin{column}{.5\textwidth}
   861 \underline{\bf Strand 2}\smallskip\begin{itemize}
   320 Us (after next lecture)\medskip\\
   862 \item one task: prove the correctness of a regular expression matcher in 
   321 \begin{tikzpicture}\footnotesize
   863 the \underline{Isabelle} theorem prover
   322 \begin{axis}[
   864 \item 20\%, submission on~14.12.\hspace{-5mm}\mbox{}
   323     xlabel={$n$},
   865 \end{itemize}
   324     x label style={at={(1.07,0.0)}},
       
   325     ylabel={time in secs},
       
   326     enlargelimits=false,
       
   327     xtick={0,5000,...,10000},
       
   328     xmax=11000,
       
   329     ymax=35,
       
   330     ytick={0,5,...,30},
       
   331     scaled ticks=false,
       
   332     axis lines=left,
       
   333     width=5.5cm,
       
   334     height=4cm]
       
   335 \addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
       
   336 \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
       
   337 \end{axis}
       
   338 \end{tikzpicture}
       
   339 \begin{tikzpicture}\footnotesize
       
   340 \begin{axis}[
       
   341     xlabel={$n$},
       
   342     x label style={at={(1.07,0.0)}},
       
   343     ylabel={time in secs},
       
   344     enlargelimits=false,
       
   345     ymax=35,
       
   346     ytick={0,5,...,30},
       
   347     scaled ticks=false,
       
   348     axis lines=left,
       
   349     width=5.5cm,
       
   350     height=4cm]
       
   351 \addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
       
   352 \end{axis}
       
   353 \end{tikzpicture}
   866 \end{column}
   354 \end{column}
   867 \end{columns}\medskip
   355 \end{columns}\bigskip
   868 
   356 
   869 \small
   357 \small\centering
   870 \begin{itemize}
   358 matching \texttt{[a?]\{n\}[a]\{n\}} and \texttt{(a*)*b}
   871 \item Solving more than one strand will {\bf not} give you more 
   359 against $\underbrace{\texttt{a}...\texttt{a}}_n$
   872 marks.
   360 \end{frame} 
   873 
   361 \fi
   874 \end{itemize}
   362 
   875 
   363 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
   876 \end{frame}
   364 
   877 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
   365 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   878 
   366 
   879 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   880 \begin{frame}[c]
   367 \begin{frame}[c]
   881 \frametitle{Lecture Capture}
   368 \frametitle{Catastrophic Backtracking}
   882 
   369 
   883 \begin{itemize}
   370 \begin{columns}
   884 \item Hope it works\ldots\pause actually no, it does not!\medskip\pause
   371 \begin{column}{.5\textwidth}
   885 \item It is important to use lecture capture wisely\\ (it is only the ``baseline''):
   372 
   886 \begin{itemize}  
   373 \begin{tikzpicture}
   887 \item Lecture recordings are a study and revision aid.
   374 \begin{axis}[
   888 \item Statistically, there is a clear and direct link between attendance and
   375     xlabel={$n$},
   889   attainment: Students who do not attend lectures, do less well in exams.
   376     x label style={at={(1.07,0.0)}},
   890 \end{itemize}
   377     ylabel={time in secs},
   891 
   378     %y label style={at={(0.06,0.5)}},
   892 \item Attending a lecture is more than watching it online -- if you do not
   379     enlargelimits=false,
   893 attend, you miss out!  
   380     xtick={0,5,...,30},
   894   
   381     xmax=33,
   895 \end{itemize}
   382     ymax=45,
   896 
   383     ytick={0,10,...,40},
   897 \end{frame}
   384     scaled ticks=false,
   898 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
   385     axis lines=left,
       
   386     width=5cm,
       
   387     height=4cm, 
       
   388     legend entries={Python, Java 8},  
       
   389     legend pos=north west]
       
   390 \addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
       
   391 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
       
   392 \end{axis}
       
   393 \end{tikzpicture}
       
   394 
       
   395 \begin{tikzpicture}
       
   396 \begin{axis}[
       
   397     xlabel={$n$},
       
   398     x label style={at={(1.07,0.0)}},
       
   399     ylabel={time in secs},
       
   400     %y label style={at={(0.06,0.5)}},
       
   401     %enlargelimits=false,
       
   402     %xtick={0,5000,...,30000},
       
   403     xmax=65000,
       
   404     ymax=45,
       
   405     ytick={0,10,...,40},
       
   406     scaled ticks=false,
       
   407     axis lines=left,
       
   408     width=5cm,
       
   409     height=4cm, 
       
   410     legend entries={Java 9},  
       
   411     legend pos=north west]
       
   412 \addplot[cyan,mark=*, mark options={fill=white}] table {re-java9.data};
       
   413 \end{axis}
       
   414 \end{tikzpicture}
       
   415 \end{column}
       
   416 \end{columns}\bigskip
       
   417 \center
       
   418 matching  \texttt{(a*)*b}
       
   419 against $\underbrace{\texttt{a}...\texttt{a}}_n$
       
   420 \end{frame}
       
   421 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
       
   422 
       
   423 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   424 \begin{frame}[fragile]
       
   425 \frametitle{Impact}
       
   426 
       
   427 \begin{itemize}
       
   428 \item Network Traffic Analysis
       
   429 \begin{itemize}
       
   430 \item Snort:  > 5 million downloads
       
   431 \item Bro:  > 10000 downloads
       
   432 \item thousands of regexes
       
   433 \item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip
       
   434 
       
   435 \end{itemize}
       
   436 
       
   437 \begin{verbatim}
       
   438  Jan 2 00:53:19 talisker sshd: Received disconnect from 110.53.183.227: [preauth]
       
   439  Jan 2 00:58:31 talisker sshd: Received disconnect from 110.53.183.252: [preauth]
       
   440  Jan 2 01:01:28 talisker sshd: Received disconnect from 221.194.47.236: [preauth]
       
   441  Jan 2 01:03:59 talisker sshd: Received disconnect from 110.53.183.228: [preauth]
       
   442  Jan 2 01:06:53 talisker sshd: Received disconnect from 221.194.47.245: [preauth]
       
   443  ...
       
   444 \end{verbatim}
       
   445 
       
   446 
       
   447 \item Compilers: lexical analysis
       
   448 \end{itemize}
       
   449 
       
   450 \end{frame}
       
   451 
       
   452 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
       
   453 
       
   454 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   455 \begin{frame}[c]
       
   456 \frametitle{Evil Regular Expressions}
       
   457 
       
   458 \begin{itemize}
       
   459 %\item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip
       
   460 \item Evil regular expressions\medskip
       
   461 \begin{itemize}
       
   462 \item \bl{$(a^{?\{n\}}) \cdot a^{\{n\}}$}
       
   463 \item \bl{$(a^*)^*\cdot b$}
       
   464 \item \bl{$([a$\,-\,$z]^+)^*$}
       
   465 \item \bl{$(a + a \cdot a)^*$}
       
   466 \item \bl{$(a + a^?)^*$}
       
   467 \item \bl{$\^(.*?,)\{11\}P$}
       
   468 \item \bl{$<html>.*?<head>.*?<title>.*?</title>.*?</head>.*?<body[^>]*>.*?</body>.*?</html>$}
       
   469 \end{itemize}
       
   470 
       
   471 \item Pearl Compatible Regular Expression(PCRE) \bl{$(r) \backslash 1$}
       
   472 \end{itemize}
       
   473 
       
   474 \end{frame}
       
   475 
       
   476 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
       
   477 
       
   478 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   479 
       
   480 \begin{frame}[c]
       
   481 \frametitle{A Concrete Example}
       
   482 \begin{adjustwidth*}{}{-1.2cm} 
       
   483 \resizebox{13cm}{4cm}{
       
   484 
       
   485     \begin{tikzpicture}[shorten >=1pt,node distance=2cm,on grid,auto] 
       
   486     \node[state,initial] (q_0) {$0$};
       
   487     \node[state] (q_1) [right = of q_0] {$1$};    
       
   488     \node[state] (q_2) [right=of q_1] {$2$};
       
   489     \node (q_dots1) [right = of q_2] {$\cdots$};
       
   490     \node [state] (q_n) [right = of q_dots1] {$n$};
       
   491     \node [state] (q_n1) [right = of q_n] {$n+1$};
       
   492     \node[state] (q_n2) [right=of q_n1] {$n+2$};
       
   493     \node[state,accepting] (q_n3) [right=of q_n2] {$n+3$};    
       
   494 
       
   495     \path[->]
       
   496     (q_0) edge [loop above] node {$*$} (q_0)
       
   497     (q_0) edge node {$a$} (q_1)    
       
   498     (q_1) edge node {$*$} (q_2)     
       
   499     (q_2) edge node {$*$} (q_dots1)   
       
   500     (q_dots1) edge node{$*$} (q_n)
       
   501     (q_n) edge node{$*$} (q_n1)
       
   502     (q_n1) edge node{$*$} (q_n2)
       
   503     (q_n2) edge node{$*$} (q_n3);
       
   504 
       
   505 \draw [decorate,decoration={brace,amplitude=10pt,mirror,raise=10pt},yshift=0pt]
       
   506 (q_2.south west) -- (q_n1.south east) node [black,midway,yshift = -2cm] {\footnotesize n states};
       
   507 %\draw [decorate,decoration={brace,amplitude=10pt,mirror,raise=4pt},yshift=0pt]
       
   508 %(3.5,0.65) -- (3.5,6.5) node [black,midway,xshift=0.8cm] {\footnotesize
       
   509 %$P_2$};
       
   510 \end{tikzpicture}   
       
   511 }
       
   512 \end{adjustwidth*}
       
   513 
       
   514 \begin{itemize}
       
   515 \item
       
   516 NFA accepting Regex $.*a.{n}bc$
       
   517 \item
       
   518 Matching the string aaaaaaaaaaaaaaa.....abc
       
   519 \end{itemize}
       
   520 \end{frame}
       
   521 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   522 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   523 \begin{frame}[c]
       
   524 \frametitle{Brzozowski Derivative}
       
   525 
       
   526 \begin{itemize}
       
   527 \item Brzozowski invented in his 1964 Phd thesis
       
   528 \item Intuition: Chopping off the first character
       
   529 
       
   530 \begin{center}
       
   531 \bl{$A \backslash c \dn \{ s \;|\;  c\!::\!s \in A\}$ } 
       
   532 \end{center}
       
   533 \item
       
   534 For \bl{$A = \{\textit{foo}, \textit{bar}, \textit{frak}\}$} then
       
   535 
       
   536 \begin{center}
       
   537 \bl{\begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}l}
       
   538 $A \backslash f $ & $=$ & $\{\textit{oo}, \textit{rak}\}$\\
       
   539 $A \backslash b $ & $=$ &  $\{\textit{ar}\}$\\  
       
   540 $A \backslash a $ & $=$ & $\{\}$\pause
       
   541 \end{tabular}}
       
   542 \end{center}
       
   543 \item
       
   544 \begin{tabular}{rp{4cm}}
       
   545 \bl {$r \,matches \,s=c_1c_2...c_n$}
       
   546 \bl{$\Leftrightarrow$}  \bl{$c_1c_2...c_n \in L(r)$}   \bl{$\Leftrightarrow$}\\
       
   547   \bl{$c_2...c_n \in L(r) \backslash c_1$}\\
       
   548 \bl{$\Leftrightarrow$}  \bl{$\cdots$}\\
       
   549 \bl{$\Leftrightarrow$}  \bl{$[] \in ((L(r)\backslash c_1) \backslash c_2)\cdots\backslash c_n$}
       
   550 \end{tabular}
       
   551 
       
   552 \end{itemize}
       
   553 
       
   554 \end{frame}
       
   555 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   556 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   557 
       
   558 \begin{frame}[c]
       
   559 \frametitle{Previous Example}
       
   560 
       
   561 \begin{center}
       
   562 \begin{tikzpicture}
       
   563   \begin{axis}[
       
   564     xlabel={$n$},
       
   565     x label style={at={(1.09,-0.15)}},
       
   566     ylabel={time in secs},
       
   567     scaled x ticks=false,
       
   568     enlargelimits=false,
       
   569     xtick distance=10000,
       
   570     xmax=44000, 
       
   571     ytick={0,10,...,30}, 
       
   572     ymax=35, 
       
   573     axis lines=left,
       
   574     width=7cm,
       
   575     height=3.5cm, 
       
   576     legend entries={Java \liningnums{9}+},
       
   577     legend pos=north west,
       
   578     legend cell align=left]
       
   579 \addplot[blue,mark=square*,mark options={fill=white}] table {re-java9.data};
       
   580 \end{axis}
       
   581 \end{tikzpicture}
       
   582 \end{center}
       
   583 
       
   584 \begin{center}
       
   585 \begin{tikzpicture}
       
   586   \begin{axis}[
       
   587     xlabel={$n$},
       
   588     x label style={at={(1.09,0.0)}},
       
   589     ylabel={time in secs},
       
   590     scaled x ticks=false,
       
   591     xtick distance=2000000,
       
   592     enlargelimits=false,
       
   593     xmax=6400000, 
       
   594     ytick={0,10,...,30},
       
   595     ymax=35,
       
   596     axis lines=left,
       
   597     width=7cm,
       
   598     height=3.5cm, 
       
   599     legend entries={Simple Scala},
       
   600     legend pos=north west,
       
   601     legend cell align=left]
       
   602 %\addplot[green,mark=square*,mark options={fill=white}] table {re2a.data};
       
   603 \addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
       
   604 \end{axis}
       
   605 \end{tikzpicture}
       
   606 \end{center}
       
   607 
       
   608 Regex: \bl{$(a^*)^* \cdot b$} \space\space\space\space\space\space
       
   609 Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
       
   610 
       
   611 \end{frame}
       
   612 
       
   613 
       
   614 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   615 \begin{frame}[t]
       
   616 \frametitle{Optimization}
       
   617 
       
   618 \begin{center}
       
   619 \begin{tikzpicture}
       
   620 \begin{semilogyaxis}[
       
   621     xlabel={$n$},
       
   622     x label style={at={(1.05,0.0)}},   
       
   623     ylabel={regex size},
       
   624     enlargelimits=false,
       
   625     xtick={1,4,...,30},
       
   626     xmax=33,
       
   627     %ytick={0, 10,...,100},
       
   628     scaled ticks=false,
       
   629     axis lines=left,
       
   630     width=9cm,
       
   631     height=4.5cm, 
       
   632     legend entries={Simple Scala},  
       
   633     legend pos=  south east,
       
   634     legend cell align=left  
       
   635 ]
       
   636 \addplot[black,mark=square*, mark options={fill=white}] table {re-usize.data};
       
   637 %\addplot[brown,mark=pentagon*, mark options={fill=white}] table {re-ssize.data};  
       
   638 %\addplot[red,mark=triangle*,mark options={fill=white}] table {re1.data};  
       
   639 %\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
       
   640 \end{semilogyaxis}
       
   641 \end{tikzpicture}
       
   642 \end{center}
       
   643 Regex: \bl{$(a+aa)^* \cdot b$} \space\space\space\space\space\space
       
   644 Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}\\
       
   645 \begin{itemize}
       
   646 \item
       
   647 Solution: Simplification%(Sulzmann \& Lu)
       
   648 \begin{itemize}
       
   649 \item $r+(r+s) = r+r+s = r+s$
       
   650 \item $1 \cdot r= r$
       
   651 \end{itemize}
       
   652 \end{itemize}
       
   653 \end{frame}
       
   654 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   655 \begin{frame}[t]
       
   656 \frametitle{Optimization}
       
   657 
       
   658 Regex: \bl{$(a+aa)^* \cdot b$} \space\space\space\space\space\space
       
   659 Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
       
   660 
       
   661 \begin{center}
       
   662 \begin{tikzpicture}
       
   663 \begin{axis}[
       
   664     xlabel={$n$},
       
   665     x label style={at={(1.05,0.0)}},   
       
   666     ylabel={regex size},
       
   667     enlargelimits=false,
       
   668     xtick={1,4,...,30},
       
   669     xmax=33,
       
   670     %ytick={0, 10,...,100},
       
   671     scaled ticks=false,
       
   672     axis lines=left,
       
   673     width=9cm,
       
   674     height=4.5cm, 
       
   675     legend entries={Simplification Applied},  
       
   676     legend pos=  south east,
       
   677     legend cell align=left  
       
   678 ]
       
   679 %\addplot[black,mark=square*, mark options={fill=white}] table {re-usize.data};
       
   680 \addplot[black,mark=triangle*, mark options={fill=white}] table {re-ssize.data};  
       
   681 %\addplot[red,mark=triangle*,mark options={fill=white}] table {re1.data};  
       
   682 %\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
       
   683 \end{axis}
       
   684 \end{tikzpicture}
       
   685 \end{center}
       
   686 \begin{itemize}
       
   687 \item
       
   688 Lexer? Not just matcher
       
   689 \begin{itemize}
       
   690 \item
       
   691 Bit-Coded Regular Expression Parsing (Lasse Nielsen, Fritz Henglein 2011)
       
   692 \item
       
   693 Bit-Coded Regular Expression Parsing with Simplification (Sulzmann and Lu 2014)
       
   694 \end{itemize}
       
   695 
       
   696 \end{itemize}
       
   697 \end{frame}
       
   698 
       
   699 
       
   700 
       
   701 
       
   702 
       
   703 
       
   704 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   705 
       
   706 \begin{frame}[t]
       
   707 \frametitle{Ongoing Work}
       
   708 
       
   709 
       
   710 \begin{tikzpicture}
       
   711 \begin{semilogyaxis}[
       
   712     xlabel={$n$},
       
   713     x label style={at={(1.05,0.0)}},   
       
   714     ylabel={regex size},
       
   715     enlargelimits=false,
       
   716     xtick={1,4,...,30},
       
   717     xmax=33,
       
   718     %ytick={0, 10,...,100},
       
   719     scaled ticks=false,
       
   720     axis lines=left,
       
   721     width=9cm,
       
   722     height=4.5cm, 
       
   723     legend entries={Naive, Simp},  
       
   724     legend pos= north west,
       
   725     legend cell align=left  
       
   726 ]
       
   727 \addplot[black,mark=square*, mark options={fill=white}] table {re-usize.data};
       
   728 \addplot[black,mark=triangle*, mark options={fill=white}] table {re-ssize.data};  
       
   729 %\addplot[red,mark=triangle*,mark options={fill=white}] table {re1.data};  
       
   730 %\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
       
   731 \end{semilogyaxis}
       
   732 \end{tikzpicture}
       
   733 
       
   734 \begin{itemize}
       
   735 \item
       
   736 Correctness of Sulzmann \& Lu's algorithm
       
   737 
       
   738 \item
       
   739 Size Bound $O(n^2t^2)$ of Dervatives (1995 Antimirov "Partial Derivative")
       
   740 \item
       
   741 More radical simplifications.
       
   742 \item
       
   743 Extend the algorithm to back-references.
       
   744 \end{itemize}
       
   745 \end{frame}
       
   746 
       
   747 
       
   748 
       
   749 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
       
   750 
       
   751 
       
   752 
       
   753 
       
   754 
       
   755 
       
   756 
   899 
   757 
   900 
   758 
   901 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   759 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   902 \begin{frame}[c]
   760 \begin{frame}[c]
   903 \frametitle{\begin{tabular}{c}\\[3cm]\alert{Questions?}\end{tabular}}
   761 \frametitle{\begin{tabular}{c}\\[3cm]\alert{Questions?}\end{tabular}}