876
+ − 1
% !TEX program = xelatex
+ − 2
\documentclass[dvipsnames,14pt,t,xelatex,aspectratio=169,xcolor={table}]{beamer}
+ − 3
\usepackage{../slides}
+ − 4
\usepackage{../graphicss}
+ − 5
\usepackage{../langs}
+ − 6
\usepackage{../data}
+ − 7
\usetikzlibrary{cd}
930
+ − 8
\usepackage{listings-rust}
876
+ − 9
+ − 10
+ − 11
\usepackage{tcolorbox}
+ − 12
\newtcolorbox{mybox}{colback=red!5!white,colframe=red!75!black}
+ − 13
\newtcolorbox{mybox2}[1]{colback=red!5!white,colframe=red!75!black,fonttitle=\bfseries,title=#1}
+ − 14
\newtcolorbox{mybox3}[1]{colback=Cyan!5!white,colframe=Cyan!75!black,fonttitle=\bfseries,title=#1}
+ − 15
+ − 16
+ − 17
+ − 18
\hfuzz=220pt
+ − 19
+ − 20
\lstset{language=Scala,
+ − 21
style=mystyle,
+ − 22
numbersep=0pt,
+ − 23
numbers=none,
+ − 24
xleftmargin=0mm}
+ − 25
+ − 26
\newcommand{\bl}[1]{\textcolor{blue}{#1}}
+ − 27
+ − 28
% beamer stuff
+ − 29
\renewcommand{\slidecaption}{CFL 01, King's College London}
+ − 30
+ − 31
%% https://cs.rit.edu/~hh/teaching/_media/cc18/lectures/lect1/main.pdf
+ − 32
%% https://cs.rit.edu/~hh/teaching/_media/cc18/lectures/lect2/main.pdf
+ − 33
%% https://cs.rit.edu/~hh/teaching/_media/cc18/lectures/lect3/main.pdf
+ − 34
929
+ − 35
876
+ − 36
\begin{document}
+ − 37
+ − 38
%\begin{frame}[t]
+ − 39
%
+ − 40
%\begin{mybox}
+ − 41
%A physical explanation the \emph{dynamic matrix}\\
+ − 42
%lots of text
+ − 43
%\end{mybox}
+ − 44
+ − 45
+ − 46
%\begin{mybox2}{Test}
+ − 47
%A physical explanation the \emph{dynamic matrix}\\
+ − 48
%lots of text
+ − 49
%\end{mybox2}
+ − 50
+ − 51
%\begin{mybox3}{Test}
+ − 52
%A physical explanation the \emph{dynamic matrix}\\
+ − 53
%lots of text
+ − 54
%\end{mybox3}
906
+ − 55
% \end{frame}
+ − 56
+ − 57
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
922
+ − 58
%\begin{frame}[t]
+ − 59
%\frametitle{%
+ − 60
% \begin{tabular}{@ {}c@ {}}
+ − 61
% \\[-3mm]
+ − 62
% \LARGE Lunch with a Lecturer (29 March)\\[5mm]
+ − 63
% \end{tabular}}
+ − 64
%
+ − 65
% I teach CFL (compilers) and PEP (Scala)\bigskip
+ − 66
%
+ − 67
% \begin{itemize}
+ − 68
% \item did undergraduate in Germany
+ − 69
% \item Master in St Andrews
+ − 70
% \item PhD in Cambridge
+ − 71
% \end{itemize}\bigskip\bigskip
+ − 72
%
+ − 73
% use mainly the Isabelle theorem prover in my work (see 6CCS3VER)
+ − 74
%
+ − 75
% write code in functional programming languages (Scala, SML, Ocaml, Haskell)
+ − 76
%\end{frame}
906
+ − 77
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 78
922
+ − 79
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 80
% \begin{frame}[c]
+ − 81
% \frametitle{Why Bother with Regexes?}
906
+ − 82
922
+ − 83
% \begin{columns}[t,onlytextwidth]
+ − 84
% \begin{column}{1.8cm}
+ − 85
% \mbox{}
+ − 86
% \end{column}
+ − 87
% \begin{column}{.5\textwidth}
+ − 88
% \small{}Ruby, Python, Java 8\medskip\\
+ − 89
% \begin{tikzpicture}\footnotesize
+ − 90
% \begin{axis}[
+ − 91
% xlabel={$n$},
+ − 92
% x label style={at={(1.05,0.0)}},
+ − 93
% ylabel={time in secs},
+ − 94
% enlargelimits=false,
+ − 95
% xtick={0,5,...,30},
+ − 96
% xmax=33,
+ − 97
% ymax=35,
+ − 98
% ytick={0,5,...,30},
+ − 99
% scaled ticks=false,
+ − 100
% axis lines=left,
+ − 101
% width=\textwidth,
+ − 102
% height=4cm,
+ − 103
% legend entries={Python,Ruby},
+ − 104
% legend pos=north west,
+ − 105
% legend cell align=left]
+ − 106
% \addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
+ − 107
% \addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data};
+ − 108
% \end{axis}
+ − 109
% \end{tikzpicture}
+ − 110
% \begin{tikzpicture}\footnotesize
+ − 111
% \begin{axis}[
+ − 112
% xlabel={$n$},
+ − 113
% x label style={at={(1.05,0.0)}},
+ − 114
% ylabel={time in secs},
+ − 115
% enlargelimits=false,
+ − 116
% xtick={0,5,...,30},
+ − 117
% xmax=33,
+ − 118
% ymax=35,
+ − 119
% ytick={0,5,...,30},
+ − 120
% scaled ticks=false,
+ − 121
% axis lines=left,
+ − 122
% width=\textwidth,
+ − 123
% height=4cm,
+ − 124
% legend entries={Python, Java 8, JavaScript, Swift},
+ − 125
% legend pos=north west,
+ − 126
% legend cell align=left]
+ − 127
% \addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
+ − 128
% \addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
+ − 129
% \addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
+ − 130
% \addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
+ − 131
% \end{axis}
+ − 132
% \end{tikzpicture}
+ − 133
% %
+ − 134
% \end{column}
+ − 135
% \begin{column}{.5\textwidth}
+ − 136
% \small{}In PEP \& CFL \medskip\\
+ − 137
% \begin{tikzpicture}\footnotesize
+ − 138
% \begin{axis}[
+ − 139
% xlabel={$n$},
+ − 140
% x label style={at={(1.07,0.0)}},
+ − 141
% ylabel={time in secs},
+ − 142
% enlargelimits=false,
+ − 143
% xtick={0,5000,...,10000},
+ − 144
% xmax=11000,
+ − 145
% ymax=35,
+ − 146
% ytick={0,5,...,30},
+ − 147
% scaled ticks=false,
+ − 148
% axis lines=left,
+ − 149
% width=\textwidth,
+ − 150
% height=4cm]
+ − 151
% \addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
+ − 152
% \addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
+ − 153
% \end{axis}
+ − 154
% \end{tikzpicture}
+ − 155
% \begin{tikzpicture}\footnotesize
+ − 156
% \begin{axis}[
+ − 157
% xlabel={$n$},
+ − 158
% x label style={at={(1.07,0.0)}},
+ − 159
% ylabel={time in secs},
+ − 160
% enlargelimits=false,
+ − 161
% ymax=35,
+ − 162
% ytick={0,5,...,30},
+ − 163
% scaled ticks=false,
+ − 164
% axis lines=left,
+ − 165
% width=\textwidth,
+ − 166
% height=4cm]
+ − 167
% \addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
+ − 168
% \end{axis}
+ − 169
% \end{tikzpicture}
+ − 170
% \end{column}
+ − 171
% \end{columns}
+ − 172
% \medskip
906
+ − 173
922
+ − 174
% \begin{textblock}{3}(-0.1,3.3)
+ − 175
% \small\hfill\bl{\texttt{[a?]\{n\}[a]\{n\}}}:
+ − 176
% \end{textblock}
906
+ − 177
922
+ − 178
% \begin{textblock}{3}(-0.1,8.7)
+ − 179
% \small\hfill\bl{\texttt{(a*)*b}}:
+ − 180
% \end{textblock}
906
+ − 181
922
+ − 182
% \begin{textblock}{3}(0.3,13)
+ − 183
% \small{}matching with strings
+ − 184
% \bl{$\underbrace{\texttt{a}...\texttt{a}}_n$}
+ − 185
% \end{textblock}
906
+ − 186
922
+ − 187
% \end{frame}
+ − 188
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
906
+ − 189
+ − 190
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
922
+ − 191
% \begin{frame}[c,fragile]
+ − 192
% \frametitle{Incidents}
906
+ − 193
922
+ − 194
% \begin{itemize}
+ − 195
% \item a global outage on 2 July 2019 at \textbf{Cloudflare}
+ − 196
% (first one for six years)\medskip
906
+ − 197
922
+ − 198
% \begin{center}\small\color{blue}
+ − 199
% \begin{verbatim}
+ − 200
% (?:(?:\"|'|\]|\}|\\|\d|(?:nan|infinity|true|false|
+ − 201
% null|undefined|symbol|math)|\`|\-|\+)+[)]*;?((?:\s
+ − 202
% |-|~|!|{}|\|\||\+)*.*(?:.*=.*)))
+ − 203
% \end{verbatim}
+ − 204
% \end{center}\bigskip\bigskip\bigskip\bigskip\bigskip\bigskip\bigskip
906
+ − 205
922
+ − 206
% \item on 20 July 2016 the \textbf{Stack Exchange} webpage went down
+ − 207
% because of an evil regular expression
+ − 208
% \here{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}
+ − 209
% \end{itemize}
906
+ − 210
922
+ − 211
% \begin{textblock}{6}(6,7.6)
+ − 212
% \includegraphics[scale=0.14]{../pics/cloudflare.png}\\
+ − 213
% \footnotesize
+ − 214
% It serves more web traffic than Twitter, Amazon, Apple,
+ − 215
% Instagram, Bing \& Wikipedia combined.
+ − 216
% \here{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019/}
+ − 217
% \end{textblock}
906
+ − 218
922
+ − 219
% \end{frame}
+ − 220
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
906
+ − 221
922
+ − 222
% %\begin{frame}[c]
+ − 223
% %
+ − 224
% %\frametitle{}
+ − 225
% %\begin{mybox3}{}\it
+ − 226
% ``This conversation is interesting to me, and I've researched it a little bit... I also disagree with Dr. Urban on the cost/benefit of non-GC languages...[..]\\
+ − 227
%
+ − 228
% But regardless, Scala is a lot slower than, say, C or Rust. To say it's not is basically wrong (imo)....[..]
+ − 229
% ''\\
+ − 230
%\mbox{}\hfill-- Oliver Iliffe, discussion this year in PEP
+ − 231
%\end{mybox3}\pause
+ − 232
%
+ − 233
%\end{frame}
906
+ − 234
922
+ − 235
%\begin{frame}<1-10>
+ − 236
%\end{frame}
876
+ − 237
+ − 238
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 239
\begin{frame}[t]
+ − 240
\frametitle{%
+ − 241
\begin{tabular}{@ {}c@ {}}
+ − 242
\\[-3mm]
+ − 243
\LARGE Compilers and \\[-1mm]
880
+ − 244
\LARGE Formal Languages\\[-5mm]
876
+ − 245
\end{tabular}}
+ − 246
880
+ − 247
%\begin{center}
876
+ − 248
%\includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm}
+ − 249
%\includegraphics[scale=0.31]{pics/ante2.jpg}\\
+ − 250
%\footnotesize\textcolor{gray}{Antikythera automaton, 100 BC (Archimedes?)}
880
+ − 251
%\end{center}
876
+ − 252
+ − 253
\normalsize
+ − 254
\begin{center}
+ − 255
\begin{tabular}{ll}
+ − 256
Email: & christian.urban at kcl.ac.uk\\
930
+ − 257
Office Hour: & Thurdays 15 -- 16\\
880
+ − 258
Location: & N7.07 (North Wing, Bush House)\\
876
+ − 259
Slides \& Progs: & KEATS\\
880
+ − 260
Pollev: & \texttt{\alert{https://pollev.com/cfltutoratki576}}\\
876
+ − 261
\end{tabular}
+ − 262
\end{center}
+ − 263
+ − 264
\begin{center}
+ − 265
\begin{tikzpicture}
+ − 266
\node[drop shadow,fill=white,inner sep=0pt]
+ − 267
{\footnotesize\rowcolors{1}{capri!10}{white}
+ − 268
\begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline
+ − 269
\cellcolor{blue!50}
+ − 270
1 Introduction, Languages & 6 While-Language \\
+ − 271
2 Regular Expressions, Derivatives & 7 Compilation, JVM \\
+ − 272
3 Automata, Regular Languages & 8 Compiling Functional Languages \\
+ − 273
4 Lexing, Tokenising & 9 Optimisations \\
+ − 274
5 Grammars, Parsing & 10 LLVM \\ \hline
+ − 275
\end{tabular}%
+ − 276
};
+ − 277
\end{tikzpicture}
+ − 278
\end{center}
+ − 279
+ − 280
\end{frame}
+ − 281
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 282
+ − 283
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 284
\begin{frame}<1-12>[c]
+ − 285
\frametitle{The Goal of this Module\ldots}
+ − 286
+ − 287
\begin{center}
+ − 288
\begin{tikzpicture}[scale=1,
+ − 289
node/.style={
+ − 290
rectangle,rounded corners=3mm,
+ − 291
very thick,draw=black!50,minimum height=18mm, minimum width=20mm,
+ − 292
top color=white,bottom color=black!20,drop shadow}]
+ − 293
+ − 294
\node at (3.05, 1.8) {\Large\bf \ldots{} you write a compiler};
+ − 295
+ − 296
\node (0) at (-2.3,0) {};
+ − 297
\node [above=5mm of 0]
+ − 298
{\makebox[0mm]{\footnotesize
+ − 299
\begin{tabular}{@{}l@{}}input\\[-1mm]program\end{tabular}}};
+ − 300
+ − 301
\node (A) at (0,0) [node] {};
+ − 302
\node [below right] at (A.north west) {lexer};
+ − 303
+ − 304
\node (B) at (3,0) [node] {};
+ − 305
\node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser};
+ − 306
+ − 307
\node (C) at (6,0) [node] {};
+ − 308
\node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen};
+ − 309
+ − 310
\node (1) at (8.4,0) {};
+ − 311
\node [above=5mm of 1]
+ − 312
{\makebox[0mm]{\footnotesize
+ − 313
\begin{tabular}{@{}r@{}}binary\\[-1mm]code\end{tabular}}};
+ − 314
929
+ − 315
\draw [->,line width=3mm] (0) -- (A);
+ − 316
\draw [->,line width=3mm] (A) -- (B);
+ − 317
\draw [->,line width=3mm] (B) -- (C);
+ − 318
\draw [->,line width=3mm] (C) -- (1);
876
+ − 319
\end{tikzpicture}
+ − 320
\end{center}
+ − 321
+ − 322
\only<2,3,4>{
+ − 323
\begin{textblock}{1}(1,2.1)
+ − 324
\begin{bubble}[9.8cm]
+ − 325
\normalsize
+ − 326
lexer input: a string\smallskip\\
+ − 327
\hspace{5mm}\code{"read(n);"}\medskip\\
+ − 328
lexer output: a sequence of tokens\smallskip\\
+ − 329
\hspace{5mm}\code{key(read) lpar id(n) rpar semi}
+ − 330
\end{bubble}
+ − 331
\end{textblock}}
+ − 332
+ − 333
\only<3,4>{
+ − 334
\begin{textblock}{1}(6,7.8)
+ − 335
\begin{tabular}{c}
+ − 336
\includegraphics[scale=0.2]{../pics/rosetta.jpg}\\[-2mm]
+ − 337
\footnotesize lexing $\Rightarrow$ recognising words (Stone of Rosetta)
+ − 338
\end{tabular}
+ − 339
\end{textblock}}
+ − 340
+ − 341
\only<4>{
+ − 342
\begin{textblock}{1}(0.5,12)\small
+ − 343
\begin{tabular}{l@{}c@{}l}
+ − 344
\pcode{if} & $\;\Rightarrow\;$ & keyword\\
+ − 345
\pcode{iffoo} & $\;\Rightarrow\;$ & identifier\\
+ − 346
\end{tabular}
+ − 347
\end{textblock}}
+ − 348
+ − 349
\only<6>{
+ − 350
\begin{textblock}{1}(1,1.5)
+ − 351
\begin{bubble}[8.5cm]
+ − 352
\normalsize
+ − 353
parser input: a sequence of tokens\smallskip\\
+ − 354
+ − 355
{\small\hspace{5mm}\code{key(read) lpar id(n) rpar semi}}\smallskip\\
+ − 356
+ − 357
parser output: an abstract syntax tree\smallskip\\
+ − 358
\footnotesize
+ − 359
\hspace{2cm}\begin{tikzpicture}
+ − 360
\node {\code{read}}
+ − 361
child {node {\code{lpar}}}
+ − 362
child {node {\code{n}}}
+ − 363
child {node {\code{rpar}}};
+ − 364
\end{tikzpicture}
+ − 365
\end{bubble}
+ − 366
\end{textblock}}
+ − 367
+ − 368
\only<8,9>{
+ − 369
\begin{textblock}{1}(1,1.5)
+ − 370
\begin{bubble}[4cm]
+ − 371
\normalsize
+ − 372
code generation:\smallskip\\
+ − 373
\hspace{5mm}\code{istore 2}\\
+ − 374
\hspace{5mm}\code{iload 2}\\
+ − 375
\hspace{5mm}\code{ldc 10}\\
+ − 376
\hspace{5mm}\code{isub}\\
+ − 377
\hspace{5mm}\code{ifeq Label2}\\
+ − 378
\hspace{5mm}\code{iload 2}\\
+ − 379
\hspace{5mm}\code{...}\\
+ − 380
\end{bubble}
+ − 381
\end{textblock}}
+ − 382
+ − 383
\only<9>{
+ − 384
\begin{textblock}{6}(8.4,7)
+ − 385
\begin{bubble}[5cm]
+ − 386
\mbox{\begin{tikzpicture}[scale=0.58,rounded corners=0mm]
+ − 387
\begin{axis}[axis x line=bottom, axis y line=left, ylabel=secs,
+ − 388
xlabel=n,
+ − 389
enlargelimits=0.05,
+ − 390
ybar interval=0.7, legend style=small]
+ − 391
\addplot file {interpreted2.data};
+ − 392
\addplot file {compiled2.data};
+ − 393
%\legend{interpreted, compiled}
+ − 394
\end{axis}
+ − 395
\end{tikzpicture}}
+ − 396
\end{bubble}
+ − 397
\end{textblock}}
+ − 398
+ − 399
\only<10>{
+ − 400
\begin{textblock}{6}(1,3)
+ − 401
\begin{bubble}[11cm]
+ − 402
Compiler explorers, e.g.: \url{https://gcc.godbolt.org} \;\video{https://youtu.be/ysaBmhMEyUg}
+ − 403
\begin{tikzpicture}[]
+ − 404
\node (0) at (-2.3,0) {\includegraphics[scale=0.3]{pics/csource.png}};
+ − 405
\node (1) [right=35mm] at (0) {\includegraphics[scale=0.3]{pics/cassmbl.png}};
+ − 406
\draw [->,line width=4mm, red] (0) -- (1);
+ − 407
\node (2) [below=20mm] at (0) {\LARGE\bf source};
+ − 408
\node (3) [right=40mm] at (2) {\LARGE\bf binary};
+ − 409
\draw [->,line width=1mm] (2) -- (3);
+ − 410
\end{tikzpicture}
+ − 411
\end{bubble}
+ − 412
+ − 413
\end{textblock}}
+ − 414
\only<11>{
+ − 415
\begin{textblock}{6}(1,3)
+ − 416
\begin{bubble}[11cm]
+ − 417
Compiler explorer for Java: \url{https://javap.yawk.at}
+ − 418
\begin{tikzpicture}[]
+ − 419
\node (0) at (-2.3,0) {\includegraphics[scale=0.4]{pics/jsource.png}};
+ − 420
\node (1) [right=35mm] at (0) {\includegraphics[scale=0.4]{pics/jassmbl.png}};
+ − 421
\draw [->,line width=4mm, red] (0) -- (1);
+ − 422
\node (2) [below=20mm] at (0) {\LARGE\bf source};
+ − 423
\node (3) [right=40mm] at (2) {\LARGE\bf byte code};
+ − 424
\draw [->,line width=1mm] (2) -- (3);
+ − 425
\end{tikzpicture}
+ − 426
\end{bubble}
+ − 427
\end{textblock}}
+ − 428
+ − 429
+ − 430
\end{frame}
+ − 431
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 432
+ − 433
+ − 434
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 435
\begin{frame}[t]
+ − 436
\frametitle{Why Study Compilers?}
+ − 437
+ − 438
+ − 439
John Regehr {\small(Univ.~Utah, LLVM compiler hacker)}
+ − 440
\here{https://blog.regehr.org/archives/1419}
+ − 441
\smallskip\\
+ − 442
+ − 443
\begin{bubble}[10.5cm]
+ − 444
\bf ``\ldots{}It’s effectively a perpetual
+ − 445
employment act for solid compiler hackers.''
+ − 446
\end{bubble}
+ − 447
+ − 448
\onslide<1->{
+ − 449
\only<2>{
+ − 450
\begin{itemize}
+ − 451
\item {\bf Hardware is getting weirder
+ − 452
rather than getting clocked faster.}
+ − 453
+ − 454
\begin{itemize}
+ − 455
\item[] ``Almost all processors are multicores nowadays and it looks
+ − 456
like there is increasing asymmetry in resources across cores.
+ − 457
Processors come with vector units, crypto accelerators etc. We have
+ − 458
DSPs, GPUs, ARM big.little, and Xeon Phi. This is only scratching the
+ − 459
surface.''
+ − 460
\end{itemize}
+ − 461
\end{itemize}}
+ − 462
\only<3>{
+ − 463
\begin{itemize}
+ − 464
\item {\bf We’re getting tired of low-level languages and
+ − 465
their associated security disasters.}
+ − 466
+ − 467
\begin{itemize}
+ − 468
\item [] ``We want to write new code, to whatever extent possible, in
+ − 469
safer, higher-level languages. Compilers are caught right in the
+ − 470
middle of these opposing trends: one of their main jobs is to help
+ − 471
bridge the large and growing gap between increasingly high-level
+ − 472
languages and increasingly wacky platforms.''
+ − 473
\end{itemize}
+ − 474
\end{itemize}}}
+ − 475
+ − 476
\end{frame}
+ − 477
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 478
879
+ − 479
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
930
+ − 480
% {
+ − 481
% \setbeamercolor{background canvas}{bg=cream}
+ − 482
% \begin{frame}[c]
879
+ − 483
930
+ − 484
% \frametitle{}
+ − 485
% \begin{mybox3}{}\it
+ − 486
% ``I enjoyed the module - it was genuinely the stand out academic
+ − 487
% experience of my undergraduate degree, and very much influenced my
+ − 488
% career interests. In fact I am currently working at ARM, in their Open
+ − 489
% Source Software group, on AArch64 specific optimisations for the
+ − 490
% Java/Kotlin compiler that forms part of the Android Runtime.''\\
+ − 491
% \mbox{}\hfill-- Hari Limaye in year 2021/22
+ − 492
% \end{mybox3}\pause
879
+ − 493
+ − 494
930
+ − 495
% Student numbers in CFL\medskip\\
+ − 496
% \begin{tabular}{ll}
+ − 497
% 2019: & 32\\
+ − 498
% 2020: & 59\\
+ − 499
% 2021: & 109\\
+ − 500
% 2022: & 121\\
+ − 501
% \end{tabular}
879
+ − 502
930
+ − 503
% \end{frame}
+ − 504
% }
879
+ − 505
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 506
+ − 507
+ − 508
876
+ − 509
+ − 510
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 511
\begin{frame}[c]
+ − 512
\frametitle{Why Bother with Compilers?}
+ − 513
+ − 514
\textbf{Boeing 777's}: First flight in 1994. They want to achieve
+ − 515
triple redundancy for potential hardware faults.
+ − 516
\here{http://www.citemaster.net/get/db3a81c6-548e-11e5-9d2e-00163e009cc7/R8.pdf}\bigskip
+ − 517
+ − 518
They compile 1 Ada program to\medskip
+ − 519
+ − 520
\begin{itemize}
+ − 521
\item Intel 80486
+ − 522
\item Motorola 68040 (old Macintosh's)
+ − 523
\item AMD 29050 (RISC chips used often in laser printers)
+ − 524
\end{itemize}\medskip\medskip
+ − 525
+ − 526
using 3 independent compilers.\bigskip\pause
+ − 527
+ − 528
\small Airbus uses C and static analysers. Recently started using CompCert.
+ − 529
+ − 530
\only<1->{%
+ − 531
\begin{textblock}{6}(8,4.5)
+ − 532
\includegraphics[scale=0.28]{../pics/777.png}
+ − 533
\end{textblock}}
+ − 534
+ − 535
\end{frame}
+ − 536
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 537
+ − 538
+ − 539
+ − 540
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 541
\begin{frame}[c]
+ − 542
\frametitle{What Do Compilers Do?}
+ − 543
+ − 544
Remember BF*** from PEP?
+ − 545
+ − 546
\begin{center}
+ − 547
\begin{tabular}{rcl}
+ − 548
\bl{\texttt{>}} & $\Rightarrow$ & move one cell right\\
+ − 549
\bl{\texttt{<}} & $\Rightarrow$ & move one cell left\\
+ − 550
\bl{\texttt{+}} & $\Rightarrow$ & increase cell by one\\
+ − 551
\bl{\texttt{-}} & $\Rightarrow$ & decrease cell by one\\
+ − 552
\bl{\texttt{.}} & $\Rightarrow$ & print current cell\\
+ − 553
\bl{\texttt{,}} & $\Rightarrow$ & input current cell\\
+ − 554
\bl{\texttt{[}} & $\Rightarrow$ & loop begin\\
+ − 555
\bl{\texttt{]}} & $\Rightarrow$ & loop end\medskip\\
+ − 556
& $\Rightarrow$ & everything else is a comment\\
+ − 557
\end{tabular}
+ − 558
\end{center}
+ − 559
+ − 560
\end{frame}
+ − 561
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 562
+ − 563
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 564
\begin{frame}[c]
+ − 565
\frametitle{A ``Compiler'' for BF*** to C}
+ − 566
+ − 567
\begin{center}
+ − 568
\begin{tabular}{rcl}
+ − 569
\bl{\texttt{>}} & $\Rightarrow$ & \texttt{ptr++}\\
+ − 570
\bl{\texttt{<}} & $\Rightarrow$ & \texttt{ptr--}\\
+ − 571
\bl{\texttt{+}} & $\Rightarrow$ & \texttt{(*ptr)++}\\
+ − 572
\bl{\texttt{-}} & $\Rightarrow$ & \texttt{(*ptr)--}\\
+ − 573
\bl{\texttt{.}} & $\Rightarrow$ & \texttt{putchar(*ptr)}\\
+ − 574
\bl{\texttt{,}} & $\Rightarrow$ & \texttt{*ptr = getchar()}\\
+ − 575
\bl{\texttt{[}} & $\Rightarrow$ & \texttt{while(*ptr)\{}\\
+ − 576
\bl{\texttt{]}} & $\Rightarrow$ & \texttt{\}}\medskip\\
+ − 577
& $\Rightarrow$ & ignore everything else\\
+ − 578
\end{tabular}
+ − 579
\end{center}\bigskip
+ − 580
+ − 581
\texttt{char field[30000]\\ char *ptr = \&field[15000]}
+ − 582
+ − 583
\end{frame}
+ − 584
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 585
+ − 586
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 587
\begin{frame}[c]
+ − 588
\frametitle{Another~``Compiler''~for~BF~to~C}
+ − 589
+ − 590
\begin{center}
+ − 591
\begin{tabular}{rcl}
+ − 592
\bl{\texttt{>\ldots>}} & $\Rightarrow$ & \texttt{ptr += n}\\
+ − 593
\bl{\texttt{<\ldots<}} & $\Rightarrow$ & \texttt{ptr -= n}\\
+ − 594
\bl{\texttt{+\ldots+}} & $\Rightarrow$ & \texttt{(*ptr) += n}\\
+ − 595
\bl{\texttt{-\ldots-}} & $\Rightarrow$ & \texttt{(*ptr) -= n}\\
+ − 596
\bl{\texttt{.}} & $\Rightarrow$ & \texttt{putchar(*ptr)}\\
+ − 597
\bl{\texttt{,}} & $\Rightarrow$ & \texttt{*ptr = getchar()}\\
+ − 598
\bl{\texttt{[}} & $\Rightarrow$ & \texttt{while(*ptr)\{}\\
+ − 599
\bl{\texttt{]}} & $\Rightarrow$ & \texttt{\}}\medskip\\
+ − 600
& $\Rightarrow$ & ignore everything else\\
+ − 601
\end{tabular}
+ − 602
\end{center}\bigskip
+ − 603
+ − 604
\texttt{char field[30000]\\ char *ptr = \&field[15000]}
+ − 605
+ − 606
\end{frame}
+ − 607
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 608
+ − 609
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 610
\begin{frame}[t]
+ − 611
\frametitle{A Brief Compiler History}
+ − 612
+ − 613
\bigskip
+ − 614
\begin{itemize}
+ − 615
\item Turing Machines, 1936 (a tape as memory)
+ − 616
\item Regular Expressions, 1956\\
+ − 617
\item The first compiler for COBOL, 1957\\ (Grace Hopper)\medskip
+ − 618
\item But surprisingly research papers are still published nowadays\\
+ − 619
\item ``Parsing: The Solved Problem That Isn't''
+ − 620
\here{https://tratt.net/laurie/blog/entries/parsing_the_solved_problem_that_isnt.html}
+ − 621
\end{itemize}
+ − 622
+ − 623
+ − 624
\begin{textblock}{8.5}(5,7.6)
+ − 625
\begin{flushright}
+ − 626
\includegraphics[scale=0.3]{pics/hopper.jpg}\\
+ − 627
\footnotesize\textcolor{gray}{Grace Hopper}\smallskip\\
+ − 628
+ − 629
{\small\textcolor{gray}{(she made it to David Letterman's Tonight Show
931
+ − 630
\here{https://youtu.be/3N_ywhx6_K0?t=31} /
+ − 631
\here{https://www.youtube.com/watch?v=oE2uls6iIEU})}}
876
+ − 632
\end{flushright}
+ − 633
\end{textblock}
+ − 634
+ − 635
\end{frame}
+ − 636
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 637
+ − 638
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 639
\begin{frame}[c]
+ − 640
\frametitle{Some Housekeeping}
+ − 641
930
+ − 642
\textbf{Exam will be computer-based, invigilated in some big examination hall:}\bigskip
876
+ − 643
+ − 644
\begin{itemize}
+ − 645
\item final exam in January (35\%)
+ − 646
\item five CWs (65\%)
+ − 647
\end{itemize}\bigskip\bigskip\pause
+ − 648
+ − 649
+ − 650
\textbf{Weekly Homework (optional):}
+ − 651
\begin{itemize}
+ − 652
\item uploaded on KEATS, send answers via email, (try to!) respond individually
+ − 653
\item \alert{\bf all} questions in the exam will be from the HWs!!
+ − 654
\end{itemize}
+ − 655
+ − 656
\end{frame}
+ − 657
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 658
879
+ − 659
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
930
+ − 660
{\definecolor{rred}{HTML}{C0504D}
+ − 661
\setbeamercolor{background canvas}{bg=cream}
+ − 662
\begin{frame}[c]
+ − 663
\frametitle{Students in CFL}
+ − 664
+ − 665
\begin{center}
+ − 666
\begin{tikzpicture}
+ − 667
\begin{axis}[symbolic x coords={2016,2017,2018,2019,2020,2021,2022,2023},
+ − 668
width = \textwidth,
+ − 669
height = 5cm,
+ − 670
bar width=8mm,
+ − 671
nodes near coords,
+ − 672
axis lines = left,
+ − 673
text=black,
+ − 674
ymin=0,
+ − 675
clip=false,
+ − 676
hide y axis,
+ − 677
axis line style={-},
+ − 678
name=mygraph
+ − 679
]
+ − 680
+ − 681
\addplot[ybar,style={rred,fill=rred!75,mark=none},text=black] coordinates {
+ − 682
(2023,183)
+ − 683
(2022,112)
+ − 684
(2021,98)
+ − 685
(2020,59)
+ − 686
(2019,38)
+ − 687
(2018,20)
+ − 688
(2017,22)
+ − 689
(2016,8)};
+ − 690
\end{axis}
+ − 691
\node[anchor=north, yshift=-10mm] at (mygraph.south) {\small{}Student numbers since the start of the compiler module.};
+ − 692
+ − 693
\end{tikzpicture}
+ − 694
\end{center}
+ − 695
+ − 696
+ − 697
\end{frame}
+ − 698
}
+ − 699
+ − 700
+ − 701
+ − 702
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
879
+ − 703
{
+ − 704
\setbeamercolor{background canvas}{bg=cream}
+ − 705
\begin{frame}[c]
+ − 706
\frametitle{Homework}
+ − 707
930
+ − 708
Until 2 years ago: I did not give out solutions; students
+ − 709
sent emails to me and I responded to them individually.\bigskip\\
879
+ − 710
+ − 711
922
+ − 712
Since last year: We will review the homework mainly during the SGTs.\bigskip\\\pause
879
+ − 713
+ − 714
I will still choose the questions from the HW for the exam, but there might be
+ − 715
some larger amount of deviation.
+ − 716
+ − 717
\end{frame}
+ − 718
}
+ − 719
876
+ − 720
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 721
\begin{frame}[c]
+ − 722
\frametitle{Some Housekeeping}
+ − 723
+ − 724
\textbf{Coursework (5 accounting for 65\%):}\bigskip
+ − 725
+ − 726
\begin{itemize}
+ − 727
\item matcher (5\%)
+ − 728
\item lexer (10\%)
+ − 729
\item parser / interpreter (10\%)
+ − 730
\item JVM compiler (15\%)
+ − 731
\item LLVM compiler (25\%)
+ − 732
\end{itemize}\bigskip\pause
+ − 733
930
+ − 734
you can use \alert{any} programming language you like (Haskell, Rust, Swift)\\\pause
876
+ − 735
you can use any code I show you and is uploaded to KEATS\ldots\textbf{BUT NOTHING ELSE!}
+ − 736
+ − 737
\end{frame}
+ − 738
930
+ − 739
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 740
{
+ − 741
\setbeamercolor{background canvas}{bg=cream}
+ − 742
\begin{frame}[c,fragile]
+ − 743
\frametitle{Ammonite \& Scala 3}
+ − 744
+ − 745
I will show you all my code in Amm / Scala 3
+ − 746
+ − 747
\begin{minipage}{1.4\textwidth}
+ − 748
\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
+ − 749
$ amm
+ − 750
Loading...
+ − 751
Welcome to the Ammonite Repl 2.5.9 (Scala 3.2.2 Java 17.0.7)
+ − 752
scala> 1 + 2
+ − 753
res0: Int = 3
+ − 754
\end{lstlisting} %% $
+ − 755
\end{minipage}\medskip
+ − 756
\pause
+ − 757
+ − 758
Do not use Amm + Scala 2!
+ − 759
+ − 760
\begin{minipage}{1.4\textwidth}
+ − 761
\begin{lstlisting}[language={},numbers=none,basicstyle=\ttfamily\small]
+ − 762
$ amm2
+ − 763
Loading...
+ − 764
Welcome to the Ammonite Repl 2.5.9 (Scala 2.13.11 Java 17.0.7)
+ − 765
scala>
+ − 766
\end{lstlisting} %% $
+ − 767
\end{minipage}
+ − 768
\end{frame}
+ − 769
}
+ − 770
+ − 771
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 772
{
+ − 773
\setbeamercolor{background canvas}{bg=cream}
+ − 774
\begin{frame}[c]
931
+ − 775
\frametitle{For Install Problems}
930
+ − 776
+ − 777
\begin{itemize}
931
+ − 778
\item Harry Dilnot (harry.dilnot@kcl.ac.uk) \\
+ − 779
\;\;Windows expert
+ − 780
\item Meilai Ji (meilai.ji@kcl.ac.uk)
930
+ − 781
\end{itemize}
+ − 782
+ − 783
\end{frame}
+ − 784
}
+ − 785
931
+ − 786
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 787
{
+ − 788
\setbeamercolor{background canvas}{bg=cream}
+ − 789
\begin{frame}[c]
+ − 790
+ − 791
\begin{minipage}{1.3\textwidth}
+ − 792
\begin{mybox3}{}\it\small
+ − 793
Unequivocally the worst module I've taken on this course. The subject
+ − 794
matter is fascinating, however the insistence on the use of this
+ − 795
abomination of a language "Scala" completely ruins it. If you're going
+ − 796
to teach something as complex as this, use a proper language, not some
+ − 797
"object oriented functional" abomination. Use C, you know, the
+ − 798
language that real compilers are written in. I will go to the end of
+ − 799
the earth to dissuade others from taking this module so long as Scala
+ − 800
is still being used.\\
+ − 801
\mbox{}\hfill-- Lone voice in the end-of-year feedback in 2019
+ − 802
\end{mybox3}
+ − 803
\end{minipage}\bigskip
+ − 804
+ − 805
\small (for alternative opinions check ``What the students say'' on KEATS)
+ − 806
+ − 807
\end{frame}
+ − 808
}
+ − 809
+ − 810
930
+ − 811
876
+ − 812
+ − 813
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 814
\begin{frame}[c]
+ − 815
\frametitle{Lectures 1 - 5}
+ − 816
+ − 817
transforming strings into structured data\\[10mm]
+ − 818
+ − 819
{\LARGE\bf Lexing} {\hfill{}based on regular expressions}\medskip\\
+ − 820
\hspace{5mm}(recognising ``words'')\\[6mm]
+ − 821
+ − 822
{\LARGE\bf Parsing}\medskip\\
+ − 823
\hspace{5mm}(recognising ``sentences'')
+ − 824
+ − 825
\begin{textblock}{1}(10,9.1)
+ − 826
\begin{tabular}{c}
+ − 827
\includegraphics[scale=0.1]{../pics/rosetta.jpg}\\[-2mm]
+ − 828
\footnotesize Stone of Rosetta
+ − 829
\end{tabular}
+ − 830
\end{textblock}
+ − 831
+ − 832
\end{frame}
+ − 833
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 834
+ − 835
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 836
\begin{frame}[c]
+ − 837
\frametitle{Lectures 1 - 5}
+ − 838
+ − 839
transforming strings into structured data\\[10mm]
+ − 840
+ − 841
{\LARGE\bf Lexing} {\hfill{}based on regular expressions}\medskip\\
+ − 842
\hspace{5mm}(recognising ``words'')\\[6mm]
+ − 843
+ − 844
{\LARGE\bf Parsing}\medskip\\
+ − 845
\hspace{5mm}(recognising ``sentences'')
+ − 846
+ − 847
\begin{textblock}{1}(10,9.1)
+ − 848
\begin{tabular}{c}
+ − 849
\includegraphics[scale=0.1]{../pics/rosetta.jpg}\\[-2mm]
+ − 850
\footnotesize Stone of Rosetta
+ − 851
\end{tabular}
+ − 852
\end{textblock}
+ − 853
+ − 854
\end{frame}
+ − 855
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 856
+ − 857
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 858
\begin{frame}[c]
+ − 859
\frametitle{Lectures 5 - 10}
+ − 860
+ − 861
code generation for a small imperative and a small functional language\\[10mm]
+ − 862
+ − 863
{\LARGE\bf Interpreters}\medskip\\
+ − 864
\hspace{5mm}(directly runs a program)\\[6mm]
+ − 865
+ − 866
{\LARGE\bf Compilers}\medskip\\
+ − 867
\hspace{5mm}(generate JVM code and LLVM-IR code)
+ − 868
+ − 869
\begin{textblock}{1}(8.8,8.1)
+ − 870
\begin{tabular}{c@{}c}
+ − 871
\includegraphics[scale=0.4]{../pics/javaduke.png} &
+ − 872
\includegraphics[scale=0.23]{../pics/llvmlogo.png}
+ − 873
\end{tabular}
+ − 874
\end{textblock}
+ − 875
+ − 876
\end{frame}
+ − 877
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 878
+ − 879
+ − 880
+ − 881
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 882
\begin{frame}[t]
+ − 883
\frametitle{Familiar Regular Expresssions}
+ − 884
\small
+ − 885
\begin{center}
+ − 886
\texttt{[a-z0-9\_$\backslash{}$.-]+ @ [a-z0-9$\backslash{}$.-]+ . [a-z$\backslash{}$.]\{2,6\}}
+ − 887
\end{center}\smallskip
+ − 888
+ − 889
\begin{center}
+ − 890
\begin{tabular}{@{}lp{8.5cm}@{}}
+ − 891
\pcode{re*} & matches 0 or more times\\
+ − 892
\pcode{re+} & matches 1 or more times\\
+ − 893
\pcode{re?} & matches 0 or 1 times\\
+ − 894
\pcode{re\{n\}} & matches exactly \pcode{n} number of times\\
+ − 895
\pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\
+ − 896
\pcode{[...]} & matches any single character inside the brackets\\
+ − 897
\pcode{[^...]} & matches any single character not inside the
+ − 898
brackets\\
+ − 899
\pcode{a-z A-Z} & character ranges\\
+ − 900
\pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\
+ − 901
\pcode{.} & matches every character except newline\\
+ − 902
\pcode{(re)} & groups regular expressions and remembers
+ − 903
the matched text
+ − 904
\end{tabular}
+ − 905
\end{center}
+ − 906
+ − 907
\end{frame}
+ − 908
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
879
+ − 909
{
+ − 910
\setbeamercolor{background canvas}{bg=cream}
+ − 911
\begin{frame}[t]
+ − 912
\frametitle{Notation for REs}
+ − 913
+ − 914
\end{frame}
+ − 915
}
876
+ − 916
931
+ − 917
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 918
{
+ − 919
\setbeamercolor{background canvas}{bg=cream}
+ − 920
\begin{frame}[t]
+ − 921
+ − 922
\end{frame}
+ − 923
}
+ − 924
+ − 925
+ − 926
876
+ − 927
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 928
\begin{frame}[c]
+ − 929
\frametitle{Some ``innocent'' examples}
+ − 930
+ − 931
Let's try two examples
+ − 932
+ − 933
\begin{center}
+ − 934
\bl{\texttt{(a*)*b}}
+ − 935
\hspace{2cm}
+ − 936
\bl{\texttt{[a?]\{n\}[a]\{n\}}}
+ − 937
\end{center}\bigskip\pause
+ − 938
+ − 939
and match them with strings of the form
+ − 940
+ − 941
\begin{center}
+ − 942
\bl{\texttt{a}},
+ − 943
\bl{\texttt{aa}},
+ − 944
\bl{\texttt{aaa}},
+ − 945
\bl{\texttt{aaaa}},
+ − 946
\bl{\texttt{aaaaa}},
+ − 947
\bl{$\underbrace{\texttt{a}...\texttt{a}}_n$}
+ − 948
\end{center}
+ − 949
+ − 950
\end{frame}
+ − 951
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 952
+ − 953
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 954
\begin{frame}[c]
+ − 955
\frametitle{Why Bother with Regexes?}
+ − 956
+ − 957
\begin{columns}[t,onlytextwidth]
+ − 958
\begin{column}{1.8cm}
+ − 959
\mbox{}
+ − 960
\end{column}
+ − 961
\begin{column}{.5\textwidth}
+ − 962
\small{}Ruby, Python, Java 8\medskip\\
+ − 963
\begin{tikzpicture}\footnotesize
+ − 964
\begin{axis}[
+ − 965
xlabel={$n$},
+ − 966
x label style={at={(1.05,0.0)}},
+ − 967
ylabel={time in secs},
+ − 968
enlargelimits=false,
+ − 969
xtick={0,5,...,30},
+ − 970
xmax=33,
+ − 971
ymax=35,
+ − 972
ytick={0,5,...,30},
+ − 973
scaled ticks=false,
+ − 974
axis lines=left,
+ − 975
width=\textwidth,
+ − 976
height=4cm,
+ − 977
legend entries={Python,Ruby},
+ − 978
legend pos=north west,
+ − 979
legend cell align=left]
+ − 980
\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
+ − 981
\addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data};
+ − 982
\end{axis}
+ − 983
\end{tikzpicture}
+ − 984
\begin{tikzpicture}\footnotesize
+ − 985
\begin{axis}[
+ − 986
xlabel={$n$},
+ − 987
x label style={at={(1.05,0.0)}},
+ − 988
ylabel={time in secs},
+ − 989
enlargelimits=false,
+ − 990
xtick={0,5,...,30},
+ − 991
xmax=33,
+ − 992
ymax=35,
+ − 993
ytick={0,5,...,30},
+ − 994
scaled ticks=false,
+ − 995
axis lines=left,
+ − 996
width=\textwidth,
+ − 997
height=4cm,
+ − 998
legend entries={Python, Java 8, JavaScript, Swift},
+ − 999
legend pos=north west,
+ − 1000
legend cell align=left]
+ − 1001
\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
+ − 1002
\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
+ − 1003
\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
+ − 1004
\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
+ − 1005
\end{axis}
+ − 1006
\end{tikzpicture}
+ − 1007
%
+ − 1008
\end{column}
+ − 1009
\begin{column}{.5\textwidth}
+ − 1010
\small{}Us (after next lecture)\medskip\\
+ − 1011
\begin{tikzpicture}\footnotesize
+ − 1012
\begin{axis}[
+ − 1013
xlabel={$n$},
+ − 1014
x label style={at={(1.07,0.0)}},
+ − 1015
ylabel={time in secs},
+ − 1016
enlargelimits=false,
+ − 1017
xtick={0,5000,...,10000},
+ − 1018
xmax=11000,
+ − 1019
ymax=35,
+ − 1020
ytick={0,5,...,30},
+ − 1021
scaled ticks=false,
+ − 1022
axis lines=left,
+ − 1023
width=\textwidth,
+ − 1024
height=4cm]
+ − 1025
\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
+ − 1026
\addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
+ − 1027
\end{axis}
+ − 1028
\end{tikzpicture}
+ − 1029
\begin{tikzpicture}\footnotesize
+ − 1030
\begin{axis}[
+ − 1031
xlabel={$n$},
+ − 1032
x label style={at={(1.07,0.0)}},
+ − 1033
ylabel={time in secs},
+ − 1034
enlargelimits=false,
+ − 1035
ymax=35,
+ − 1036
ytick={0,5,...,30},
+ − 1037
scaled ticks=false,
+ − 1038
axis lines=left,
+ − 1039
width=\textwidth,
+ − 1040
height=4cm]
+ − 1041
\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
+ − 1042
\end{axis}
+ − 1043
\end{tikzpicture}
+ − 1044
\end{column}
+ − 1045
\end{columns}
+ − 1046
\medskip
+ − 1047
+ − 1048
\begin{textblock}{3}(-0.1,3.3)
+ − 1049
\small\hfill\bl{\texttt{[a?]\{n\}[a]\{n\}}}:
+ − 1050
\end{textblock}
+ − 1051
+ − 1052
\begin{textblock}{3}(-0.1,8.7)
+ − 1053
\small\hfill\bl{\texttt{(a*)*b}}:
+ − 1054
\end{textblock}
+ − 1055
+ − 1056
\begin{textblock}{3}(0.3,13)
+ − 1057
\small{}matching with strings
+ − 1058
\bl{$\underbrace{\texttt{a}...\texttt{a}}_n$}
+ − 1059
\end{textblock}
+ − 1060
+ − 1061
\end{frame}
+ − 1062
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1063
+ − 1064
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1065
\begin{frame}[c,fragile]
+ − 1066
\frametitle{Incidents}
+ − 1067
+ − 1068
\begin{itemize}
+ − 1069
\item a global outage on 2 July 2019 at \textbf{Cloudflare}
+ − 1070
(first one for six years)\medskip
+ − 1071
+ − 1072
\begin{center}\small\color{blue}
+ − 1073
\begin{verbatim}
+ − 1074
(?:(?:\"|'|\]|\}|\\|\d|(?:nan|infinity|true|false|
+ − 1075
null|undefined|symbol|math)|\`|\-|\+)+[)]*;?((?:\s
+ − 1076
|-|~|!|{}|\|\||\+)*.*(?:.*=.*)))
+ − 1077
\end{verbatim}
+ − 1078
\end{center}\bigskip\bigskip\bigskip\bigskip\bigskip\bigskip\bigskip
+ − 1079
+ − 1080
\item on 20 July 2016 the \textbf{Stack Exchange} webpage went down
+ − 1081
because of an evil regular expression
+ − 1082
\here{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}
+ − 1083
\end{itemize}
+ − 1084
+ − 1085
\begin{textblock}{6}(6,7.6)
+ − 1086
\includegraphics[scale=0.14]{../pics/cloudflare.png}\\
+ − 1087
\footnotesize
+ − 1088
It serves more web traffic than Twitter, Amazon, Apple,
+ − 1089
Instagram, Bing \& Wikipedia combined.
+ − 1090
\here{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019/}
+ − 1091
\end{textblock}
+ − 1092
+ − 1093
\end{frame}
+ − 1094
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1095
+ − 1096
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1097
\begin{frame}[c]
+ − 1098
\frametitle{Evil Regular Expressions}
+ − 1099
+ − 1100
\begin{itemize}
+ − 1101
\item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip
+ − 1102
\item Some evil regular expressions:\medskip
+ − 1103
\begin{itemize}
+ − 1104
\item \bl{\texttt{[a?]\{n\}\;[a]\{n\}}}
+ − 1105
\item \bl{\texttt{(a*)*\;b}}
+ − 1106
\item \bl{\texttt{([a-z]+)*}}
+ − 1107
\item \bl{\texttt{(a + aa)*}}
+ − 1108
\item \bl{\texttt{(a + a?)*}}
+ − 1109
\end{itemize}
+ − 1110
+ − 1111
\item sometimes also called \alert{catastrophic backtracking}
+ − 1112
\item this is a problem for \alert{N}etwork \alert{I}ntrusion
+ − 1113
\alert{D}etection systems, Cloudflare, StackExchange, Atom editor
+ − 1114
\item \url{https://vimeo.com/112065252}
+ − 1115
\end{itemize}
+ − 1116
+ − 1117
\end{frame}
+ − 1118
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1119
930
+ − 1120
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1121
{
+ − 1122
\setbeamercolor{background canvas}{bg=cream}
+ − 1123
\begin{frame}[c]
+ − 1124
\frametitle{Rust vs.~Scala (from PEP)}
+ − 1125
+ − 1126
\mbox{}
+ − 1127
+ − 1128
\begin{minipage}{1.3\textwidth}
+ − 1129
\begin{mybox3}{}\it\small
+ − 1130
\textbf{Re: Another question of purely academic interest about regex implementation in cw3}
+ − 1131
+ − 1132
This conversation is interesting to me, and I've researched it a
+ − 1133
little bit [...] I also disagree with Dr.~Urban on the cost/benefit of
+ − 1134
non-GC languages [...]\smallskip
+ − 1135
+ − 1136
But regardless, Scala is a lot slower than, say, C or Rust. To say
+ − 1137
it's not is basically wrong (imo). Perhaps one could argue that some
+ − 1138
of the guarantees Scala has makes it easier to write multi-threaded
+ − 1139
programs that utilise more of the CPU... but, in my opinion, this is
+ − 1140
also a bit misleading. Most CPUs have something like 4 to 12 cores
+ − 1141
nowadays. It's very possible that a given Scala program runs 4-12x
+ − 1142
slower than its Rust equivalent. Would you rather have your program
+ − 1143
run quickly and use a single core, or have it run equally
+ − 1144
quickly... and... hog your entire CPU for its duration?\ldots{}
+ − 1145
+ − 1146
\mbox{}\hfill-- Oliver Iliffe, discussion from PEP
+ − 1147
\end{mybox3}
+ − 1148
\end{minipage}
+ − 1149
+ − 1150
\end{frame}
+ − 1151
}
+ − 1152
+ − 1153
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1154
{
+ − 1155
\setbeamercolor{background canvas}{bg=cream}
+ − 1156
\begin{frame}[c]
+ − 1157
\frametitle{Regex Lib in Rust}
+ − 1158
+ − 1159
\begin{center}
+ − 1160
\includegraphics[scale=0.34]{../pics/rust-regex.png}
+ − 1161
\end{center}
+ − 1162
+ − 1163
\end{frame}
+ − 1164
}
+ − 1165
+ − 1166
+ − 1167
+ − 1168
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1169
{
+ − 1170
\setbeamercolor{background canvas}{bg=cream}
+ − 1171
\begin{frame}[c,fragile]
+ − 1172
+ − 1173
\begin{columns}[t,onlytextwidth]
+ − 1174
\begin{column}{1\textwidth}
+ − 1175
\small re: \bl{$(abc)^{\{n\}}$}\quad str: \bl{$\underbrace{abc\ldots{}abc}_n$}\medskip\\
+ − 1176
\begin{tikzpicture}\footnotesize
+ − 1177
\begin{axis}[
+ − 1178
xlabel={$n$},
+ − 1179
x label style={at={(1.07,0.0)}},
+ − 1180
ylabel={time in secs},
+ − 1181
enlargelimits=false,
+ − 1182
xmax=65000,
+ − 1183
ymax=100,
+ − 1184
xtick={0,15000,...,60000},
+ − 1185
ytick={0,10,...,90},
+ − 1186
scaled ticks=false,
+ − 1187
axis lines=left,
+ − 1188
width=7cm,
+ − 1189
height=5cm]
+ − 1190
\addplot[black,mark=square*,mark options={fill=red}] table [x=x, y=y, col sep=comma, row sep=crcr]
+ − 1191
{x, y\\
+ − 1192
0, 0\\
+ − 1193
5000, 0.487\\
+ − 1194
10000, 1.650\\
+ − 1195
15000, 3.617\\
+ − 1196
20000, 6.462\\
+ − 1197
25000, 10.736\\
+ − 1198
30000, 17.665\\
+ − 1199
35000, 25.662\\
+ − 1200
40000, 36.422\\
+ − 1201
45000, 49.119\\
+ − 1202
50000, 62.058\\
+ − 1203
55000, 75.941\\
+ − 1204
60000, 93.022\\
+ − 1205
};
+ − 1206
\end{axis}
+ − 1207
\end{tikzpicture}
+ − 1208
\end{column}
+ − 1209
\end{columns}
+ − 1210
+ − 1211
\begin{textblock}{10}(8.4,3.8)
+ − 1212
\tiny
+ − 1213
\begin{lstlisting}[language=Rust]
+ − 1214
extern crate regex;
+ − 1215
+ − 1216
use regex::Regex;
+ − 1217
use std::time::Instant;
+ − 1218
+ − 1219
// bounded regular expression example
+ − 1220
+ − 1221
fn main() {
+ − 1222
for bound in (0..=60000).step_by(5000) {
+ − 1223
+ − 1224
let re = Regex::new(&format!("(abc){{{}}}", bound)).unwrap();
+ − 1225
let text = "abc".repeat(bound);
+ − 1226
+ − 1227
let start_time = Instant::now();
+ − 1228
let is_match = re.is_match(&text);
+ − 1229
let elapsed_time = start_time.elapsed().as_secs_f64();
+ − 1230
+ − 1231
println!("Bound: {}, Match: {}, Time: {} seconds", bound, is_match, elapsed_time);
+ − 1232
}
+ − 1233
}
+ − 1234
\end{lstlisting}
+ − 1235
\end{textblock}
+ − 1236
\end{frame}
+ − 1237
}
+ − 1238
+ − 1239
876
+ − 1240
+ − 1241
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1242
%\begin{frame}[c]
+ − 1243
%\frametitle{Today}
+ − 1244
%
+ − 1245
%\begin{itemize}
+ − 1246
%\item While the ultimate goal is to implement a small compiler for the JVM
+ − 1247
% \ldots\bigskip
+ − 1248
%\end{itemize}
+ − 1249
%
+ − 1250
%Let's start with:
+ − 1251
%
+ − 1252
%\begin{itemize}
+ − 1253
%\item a web-crawler
+ − 1254
%\item an email harvester
+ − 1255
%\item \textcolor{gray}{(a web-scraper)}
+ − 1256
%\end{itemize}
+ − 1257
%
+ − 1258
%\end{frame}
+ − 1259
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1260
+ − 1261
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1262
%\begin{frame}[t]
+ − 1263
%\frametitle{A Web-Crawler}
+ − 1264
%
+ − 1265
%\mbox{}\\[10mm]
+ − 1266
%
+ − 1267
%\begin{enumerate}
+ − 1268
%\item given an URL, read the corresponding webpage
+ − 1269
%\item extract all links from it
+ − 1270
%\item call the web-crawler again for all these links
+ − 1271
%\end{enumerate}
+ − 1272
%
+ − 1273
%\end{frame}
+ − 1274
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1275
+ − 1276
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1277
%\begin{frame}[t]
+ − 1278
%\frametitle{A Web-Crawler}
+ − 1279
%
+ − 1280
%\mbox{}\\[10mm]
+ − 1281
%
+ − 1282
%
+ − 1283
%\begin{enumerate}
+ − 1284
%\item given an URL, read the corresponding webpage
+ − 1285
%\item if not possible print, out a problem
+ − 1286
%\item if possible, extract all links from it
+ − 1287
%\item call the web-crawler again for all these links
+ − 1288
%\end{enumerate}\bigskip\pause
+ − 1289
%
+ − 1290
%\small (we need a bound for the number of recursive calls)
+ − 1291
%
+ − 1292
%\small (the purpose is to check all links on my own webpage)
+ − 1293
%\end{frame}
+ − 1294
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1295
+ − 1296
+ − 1297
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1298
%\begin{frame}[c]
+ − 1299
%
+ − 1300
%\begin{textblock}{1}(2,5)
+ − 1301
%\begin{tabular}{c}
+ − 1302
%\includegraphics[scale=0.15]{pics/servers.png}\\[-2mm]
+ − 1303
%\small Server
+ − 1304
%\end{tabular}
+ − 1305
%\end{textblock}
+ − 1306
%
+ − 1307
%\begin{textblock}{1}(5.6,4)
+ − 1308
% \begin{tikzpicture}[scale=1.1]
+ − 1309
% \draw[white] (0,1) node (X) {};
+ − 1310
% \draw[white] (2,1) node (Y) {};
+ − 1311
% \draw[white] (0,0) node (X1) {};
+ − 1312
% \draw[white] (2,0) node (Y1) {};
+ − 1313
% \draw[white] (0,-1) node (X2) {};
+ − 1314
% \draw[white] (2,-1) node (Y2) {};
+ − 1315
% \draw[red, <-, line width = 2mm] (X) -- (Y);
+ − 1316
% \node [inner sep=5pt,label=above:\textcolor{black}{GET request}] at ($ (X)!.5!(Y) $) {};
+ − 1317
% \draw[red, ->, line width = 2mm] (X1) -- (Y1);
+ − 1318
% \node [inner sep=5pt,label=above:\textcolor{black}{webpage}] at ($ (X1)!.5!(Y1) $) {};
+ − 1319
% \draw[red, <-, line width = 2mm] (X2) -- (Y2);
+ − 1320
% \node [inner sep=7pt,label=above:\textcolor{black}{POST data}] at ($ (X2)!.5!(Y2) $) {};
+ − 1321
% \end{tikzpicture}
+ − 1322
%\end{textblock}
+ − 1323
%
+ − 1324
%
+ − 1325
%\begin{textblock}{1}(9,5.5)
+ − 1326
%\begin{tabular}{c}
+ − 1327
%\includegraphics[scale=0.15]{pics/laptop.png}\\[-2mm]
+ − 1328
%\small Browser
+ − 1329
%\end{tabular}
+ − 1330
%\end{textblock}
+ − 1331
%\end{frame}
+ − 1332
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1333
+ − 1334
+ − 1335
+ − 1336
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1337
%\begin{frame}[c]
+ − 1338
%\frametitle{Scala}
+ − 1339
%
+ − 1340
%\small A simple Scala function for reading webpages:
+ − 1341
%\bigskip
+ − 1342
%
+ − 1343
%\footnotesize
+ − 1344
%\lstinputlisting{../progs/app0.scala}
+ − 1345
%\medskip\pause
+ − 1346
%
+ − 1347
%\lstinline{get_page("""https://nms.kcl.ac.uk/christian.urban/""")}
+ − 1348
%\bigskip\medskip\pause
+ − 1349
%
+ − 1350
%
+ − 1351
%\small A slightly more complicated version for handling errors:
+ − 1352
%\smallskip
+ − 1353
%
+ − 1354
%\footnotesize
+ − 1355
%\lstinputlisting[xleftmargin=-4mm]{../progs/app1.scala}
+ − 1356
%
+ − 1357
%
+ − 1358
%\end{frame}
+ − 1359
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1360
+ − 1361
+ − 1362
+ − 1363
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1364
%\begin{frame}[t]
+ − 1365
%\frametitle{A Regular Expression}
+ − 1366
%
+ − 1367
%\begin{itemize}
+ − 1368
%\item \ldots{} is a pattern or template for specifying strings
+ − 1369
%\end{itemize}\bigskip
+ − 1370
%
+ − 1371
%\begin{center}
+ − 1372
%\only<1>{\scode{"https?://[^"]*"}}%
+ − 1373
%\only<2>{\scode{""""https?://[^"]*"""".r}}
+ − 1374
%\end{center}\bigskip\bigskip
+ − 1375
%
+ − 1376
%matches for example\smallskip\\
+ − 1377
%\hspace{2mm}\code{"http://www.foobar.com"}\\
+ − 1378
%\hspace{2mm}\code{"https://www.tls.org"}\smallskip\\
+ − 1379
%
+ − 1380
%but not\smallskip\\
+ − 1381
%\hspace{2mm}\code{"http://www."foo"bar.com"}\\
+ − 1382
%
+ − 1383
%\end{frame}
+ − 1384
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1385
+ − 1386
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1387
%\begin{frame}[c]
+ − 1388
%\frametitle{Finding Operations in Scala}
+ − 1389
%
+ − 1390
%{\bf\code{rexp.findAllIn(string)}}\medskip
+ − 1391
%
+ − 1392
%returns a list of all (sub)strings that match the
+ − 1393
%regular expression
+ − 1394
%\bigskip\bigskip
+ − 1395
%
+ − 1396
%
+ − 1397
%{\bf\code{rexp.findFirstIn(string)}}\medskip
+ − 1398
%
+ − 1399
%returns either
+ − 1400
%
+ − 1401
%\begin{itemize}
+ − 1402
%\item \code{None} if no (sub)string matches or
+ − 1403
%\item \code{Some(s)} with the first (sub)string
+ − 1404
%\end{itemize}
+ − 1405
%
+ − 1406
%\end{frame}
+ − 1407
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1408
+ − 1409
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1410
%\begin{frame}[c]
+ − 1411
%
+ − 1412
%\footnotesize
+ − 1413
%\lstinputlisting{../progs/app2.scala}
+ − 1414
%
+ − 1415
%\end{frame}
+ − 1416
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1417
+ − 1418
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1419
%\begin{frame}[c]
+ − 1420
%
+ − 1421
%\small
+ − 1422
%A version that only crawls links in ``my'' domain:\bigskip
+ − 1423
%
+ − 1424
%\footnotesize
+ − 1425
%\lstinputlisting{../progs/app3.scala}
+ − 1426
%
+ − 1427
%\end{frame}
+ − 1428
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1429
+ − 1430
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1431
%\begin{frame}[c]
+ − 1432
%\lstset{xleftmargin=-4mm}
+ − 1433
%\small
+ − 1434
%A little email harvester:
+ − 1435
%
+ − 1436
%\footnotesize
+ − 1437
%\lstinputlisting{../progs/app4.scala}\bigskip
+ − 1438
%
+ − 1439
%\tiny
+ − 1440
%\url{http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/}
+ − 1441
%
+ − 1442
%\end{frame}
+ − 1443
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1444
+ − 1445
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1446
\begin{frame}[t]
+ − 1447
\frametitle{(Basic) Regular Expressions}
+ − 1448
+ − 1449
Their inductive definition:
+ − 1450
+ − 1451
+ − 1452
\begin{textblock}{6}(2,7.5)
+ − 1453
\begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
+ − 1454
\bl{$r$} & \bl{$::=$} & \bl{$\ZERO$} & nothing\\
+ − 1455
& \bl{$\mid$} & \bl{$\ONE$} & empty string / \pcode{""} / $[]$\\
+ − 1456
& \bl{$\mid$} & \bl{$c$} & character\\
+ − 1457
& \bl{$\mid$} & \bl{$r_1 + r_2$} & alternative / choice\\
+ − 1458
& \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
+ − 1459
& \bl{$\mid$} & \bl{$r^*$} & star (zero or more)\\
+ − 1460
\end{tabular}
+ − 1461
\end{textblock}
+ − 1462
+ − 1463
+ − 1464
\only<2->{\footnotesize
+ − 1465
\begin{textblock}{9}(2,0.5)
+ − 1466
\begin{bubble}[9.8cm]
+ − 1467
\lstinputlisting{../progs/app01.scala}
+ − 1468
\end{bubble}
+ − 1469
\end{textblock}}
+ − 1470
+ − 1471
\end{frame}
+ − 1472
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1473
+ − 1474
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1475
%\begin{frame}[t]
+ − 1476
%\frametitle{Regular Expressions}
+ − 1477
%
+ − 1478
%\small
+ − 1479
%In Scala:\bigskip
+ − 1480
%
+ − 1481
%\footnotesize
+ − 1482
%\lstinputlisting{../progs/app51.scala}
+ − 1483
%
+ − 1484
%
+ − 1485
%\end{frame}
+ − 1486
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1487
+ − 1488
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1489
\begin{frame}[t]
+ − 1490
\frametitle{Strings}
+ − 1491
+ − 1492
\ldots are lists of characters. For example \code{"hello"}
+ − 1493
+ − 1494
\begin{center}
+ − 1495
\bl{$[h, e, l, l, o]$} or just \bl{$hello$}
+ − 1496
\end{center}
+ − 1497
+ − 1498
the empty string: \bl{$[]$} or \bl{\pcode{""}}\bigskip\\
+ − 1499
+ − 1500
the concatenation of two strings:
+ − 1501
+ − 1502
\begin{center}
+ − 1503
\bl{$s_1 \,@\, s_2$}
+ − 1504
\end{center}
+ − 1505
+ − 1506
\bl{\textit{foo $@$ bar = foobar}}\\
+ − 1507
\bl{\textit{baz $@\, []$ = baz}}
+ − 1508
+ − 1509
\end{frame}
+ − 1510
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1511
+ − 1512
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1513
\begin{frame}[c]
+ − 1514
\frametitle{Languages, Strings}
+ − 1515
+ − 1516
\begin{itemize}
+ − 1517
\item \alert{\bf Strings} are lists of characters, for example
+ − 1518
\begin{center}
+ − 1519
\bl{$[]$},\;\bl{$abc$} \hspace{2cm}(Pattern match: \bl{$c\!::\!s$})
+ − 1520
\end{center}\bigskip
+ − 1521
+ − 1522
+ − 1523
\item A \alert{\bf language} is a set of strings, for example\medskip
+ − 1524
\begin{center}
+ − 1525
\bl{$\{[], hello, \textit{foobar}, a, abc\}$}
+ − 1526
\end{center}\bigskip
+ − 1527
+ − 1528
\item \alert{\bf Concatenation} of strings and languages
+ − 1529
+ − 1530
\begin{center}
+ − 1531
\begin{tabular}{rcl}
+ − 1532
\bl{$\textit{foo}\;@\;bar$} & \bl{$=$} & \bl{$\textit{foobar}$}\medskip\\
+ − 1533
\bl{$A\;@\;B$} & \bl{$\dn$} & \bl{$\{ s_1\,@\,s_2 \;\mid\; s_1 \in A \wedge s_2 \in B\}$}
+ − 1534
\end{tabular}
+ − 1535
\end{center}
+ − 1536
+ − 1537
%\item The \alert{\bf meaning} of a regular expression is a set of
+ − 1538
% strings, or language.
+ − 1539
\end{itemize}
+ − 1540
+ − 1541
\only<2>{
+ − 1542
\begin{textblock}{4}(10.5,8)
+ − 1543
\small
+ − 1544
Let
+ − 1545
+ − 1546
\bl{$A = \{foo, bar\}$} \bl{$B = \{a, b\}$}
+ − 1547
\[
+ − 1548
\bl{A \,@\, B = \{fooa, foob, bara, barb\}}
+ − 1549
\]
+ − 1550
\end{textblock}}
+ − 1551
+ − 1552
\end{frame}
+ − 1553
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1554
+ − 1555
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1556
\begin{frame}[c]
+ − 1557
\frametitle{Two Corner Cases}
+ − 1558
+ − 1559
\Large
+ − 1560
\begin{center}
+ − 1561
\bl{$A \,@\, \{[]\} = \;?$}\bigskip\bigskip\pause\\
+ − 1562
\bl{$A \,@\, \{\} = \;?$}
+ − 1563
\end{center}
+ − 1564
+ − 1565
\end{frame}
+ − 1566
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1567
+ − 1568
+ − 1569
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1570
\begin{frame}[c]
+ − 1571
\frametitle{The Meaning of a Regex}
+ − 1572
+ − 1573
...all the strings a regular expression can match.
+ − 1574
+ − 1575
\begin{center}
+ − 1576
\begin{tabular}{rcl}
+ − 1577
\bl{$L(\ZERO)$} & \bl{$\dn$} & \bl{$\{\}$}\\
+ − 1578
\bl{$L(\ONE)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
+ − 1579
\bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\
+ − 1580
\bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
+ − 1581
\bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \,@\, L(r_2)$}\\
+ − 1582
\bl{$L(r^*)$} & \bl{$\dn$} & \\
+ − 1583
\end{tabular}
+ − 1584
\end{center}
+ − 1585
+ − 1586
\begin{textblock}{14}(1.5,13.5)\small
+ − 1587
\bl{$L$} is a function from regular expressions to
+ − 1588
sets of strings (languages):\smallskip\\
+ − 1589
\bl{\quad$L$ : Rexp $\Rightarrow$ Set$[$String$]$}
+ − 1590
\end{textblock}
+ − 1591
+ − 1592
\end{frame}
+ − 1593
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1594
+ − 1595
+ − 1596
+ − 1597
+ − 1598
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1599
\begin{frame}[c]
+ − 1600
\frametitle{The Power Operation}
+ − 1601
+ − 1602
\begin{itemize}
+ − 1603
\item The \alert{\textbf{\boldmath$n$th Power}} of a language:
+ − 1604
+ − 1605
\begin{center}
+ − 1606
\begin{tabular}{lcl}
+ − 1607
\bl{$A^0$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
+ − 1608
\bl{$A^{n+1}$} & \bl{$\dn$} & \bl{$A \,@\, A^n$}
+ − 1609
\end{tabular}
+ − 1610
\end{center}\bigskip
+ − 1611
+ − 1612
\item[] For example
+ − 1613
+ − 1614
\begin{center}
+ − 1615
\begin{tabular}{lcl@{\hspace{10mm}}l}
+ − 1616
\bl{$A^4$} & \bl{$=$} & \bl{$A \,@\, A \,@\, A \,@\, A$} & \bl{$(@\,\{[]\})$}\\
+ − 1617
\bl{$A^1$} & \bl{$=$} & \bl{$A$} & \bl{$(@\,\{[]\})$}\\
+ − 1618
\bl{$A^0$} & \bl{$=$} & \bl{$\{[]\}$}\\
+ − 1619
\end{tabular}
+ − 1620
\end{center}
+ − 1621
+ − 1622
\end{itemize}
+ − 1623
+ − 1624
\end{frame}
+ − 1625
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1626
+ − 1627
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1628
\begin{frame}[c]
+ − 1629
\frametitle{The Meaning of a Regex}
+ − 1630
+ − 1631
\begin{textblock}{15}(1,4)
+ − 1632
\begin{tabular}{rcl}
+ − 1633
\bl{$L(\ZERO)$} & \bl{$\dn$} & \bl{$\{\}$}\\
+ − 1634
\bl{$L(\ONE)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
+ − 1635
\bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\
+ − 1636
\bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
+ − 1637
\bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\
+ − 1638
\bl{$L(r^*)$} & \bl{$\dn$} & \onslide<2->{\bl{$\bigcup_{0 \le n} L(r)^n$}}\\
+ − 1639
\end{tabular}\bigskip
+ − 1640
+ − 1641
%\onslide<2->{
+ − 1642
%\hspace{5mm}\bl{$L(r)^0 \;\dn\; \{[]\}$}\\
+ − 1643
%\bl{$L(r)^{n+1} \;\dn\; L(r) \,@\, L(r)^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\
+ − 1644
%\small\hspace{5cm}\textcolor{gray}{$\{ s_1 @ s_2 \;|\; s_1\in L(r) \wedge s_2 \in L(r)^n \}$}}
+ − 1645
%}
+ − 1646
\end{textblock}
+ − 1647
+ − 1648
\end{frame}
+ − 1649
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1650
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1651
\begin{frame}[c]
+ − 1652
\frametitle{The Star Operation}
+ − 1653
+ − 1654
\begin{itemize}
+ − 1655
\item The \alert{\bf Kleene Star} of a \underline{language}:
+ − 1656
\bigskip
+ − 1657
+ − 1658
\begin{center}
+ − 1659
\begin{tabular}{c}
+ − 1660
\bl{$A\star \dn \bigcup_{0\le n} A^n$}
+ − 1661
\end{tabular}
+ − 1662
\end{center}\bigskip
+ − 1663
+ − 1664
\item[] This expands to
+ − 1665
+ − 1666
\[
+ − 1667
\bl{A^0 \cup A^1 \cup A^2 \cup A^3 \cup A^4 \cup \ldots}
+ − 1668
\]
+ − 1669
+ − 1670
or
+ − 1671
+ − 1672
\small
+ − 1673
\[
+ − 1674
\bl{\{[]\} \;\cup\; A \;\cup\; A\,@\,A \;\cup\;
+ − 1675
A\,@\,A\,@\,A \;\cup\; A\,@\,A\,@\,A\,@\,A \cup \ldots}
+ − 1676
\]
+ − 1677
+ − 1678
\end{itemize}
+ − 1679
+ − 1680
\end{frame}
+ − 1681
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1682
+ − 1683
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1684
\begin{frame}[c]
+ − 1685
\frametitle{The Meaning of a Regex}
+ − 1686
+ − 1687
\begin{textblock}{15}(1,4)
+ − 1688
\begin{tabular}{rcl}
+ − 1689
\bl{$L(\ZERO)$} & \bl{$\dn$} & \bl{$\{\}$}\\
+ − 1690
\bl{$L(\ONE)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
+ − 1691
\bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\
+ − 1692
\bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
+ − 1693
\bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\
+ − 1694
\bl{$L(r^*)$} & \bl{$\dn$} & \bl{$(L(r))\star$}\\
+ − 1695
\end{tabular}
+ − 1696
+ − 1697
\end{textblock}
+ − 1698
+ − 1699
\end{frame}
+ − 1700
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1701
+ − 1702
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1703
\begin{frame}[c]
+ − 1704
\frametitle{The Meaning of Matching}
+ − 1705
+ − 1706
\begin{bubble}[10cm]
+ − 1707
\large\bf
+ − 1708
A regular expression \bl{$r$} matches a string~\bl{$s$}
+ − 1709
provided
+ − 1710
+ − 1711
\begin{center}
+ − 1712
\bl{$s \in L(r)$}\\
+ − 1713
\end{center}
+ − 1714
\end{bubble}\bigskip\bigskip
+ − 1715
+ − 1716
\ldots and the point of the next lecture is
+ − 1717
to decide this problem as fast as possible (unlike Python,
+ − 1718
Ruby, Java)
+ − 1719
+ − 1720
\end{frame}
879
+ − 1721
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1722
+ − 1723
876
+ − 1724
+ − 1725
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1726
\begin{frame}[c]
+ − 1727
\frametitle{Questions}
+ − 1728
+ − 1729
\begin{itemize}
+ − 1730
\item Say \bl{$A = \{[a],[b],[c],[d]\}$}.\bigskip
+ − 1731
+ − 1732
\item[]
+ − 1733
How many strings are in \bl{$A^4$}\,?
+ − 1734
\bigskip\medskip\pause
+ − 1735
+ − 1736
+ − 1737
\item[]
+ − 1738
What if \bl{$A = \{[a],[b],[c],[]\}$};\\
+ − 1739
how many strings are then in \bl{$A^4$}\,?
+ − 1740
\end{itemize}
+ − 1741
+ − 1742
\end{frame}
+ − 1743
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1744
+ − 1745
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
879
+ − 1746
\begin{frame}[c]
+ − 1747
\frametitle{Questions}
+ − 1748
+ − 1749
\begin{itemize}
+ − 1750
\item Assume a set $A$ contains 4 strings and a set $B$
+ − 1751
contains 7 strings. None of the strings is the empty
+ − 1752
string.
+ − 1753
+ − 1754
\item How many strings are in $A \,@\, B$?
+ − 1755
\end{itemize}
+ − 1756
+ − 1757
+ − 1758
\end{frame}
+ − 1759
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1760
930
+ − 1761
{
+ − 1762
\setbeamercolor{background canvas}{bg=cream}
+ − 1763
\begin{frame}[c]
+ − 1764
+ − 1765
\begin{minipage}{1.3\textwidth}
+ − 1766
+ − 1767
\end{minipage}
+ − 1768
\includegraphics[scale=0.4]{../pics/chatgpt.png}
+ − 1769
\end{frame}
+ − 1770
}
+ − 1771
+ − 1772
{
+ − 1773
\setbeamercolor{background canvas}{bg=cream}
+ − 1774
\begin{frame}[c]
+ − 1775
\frametitle{\dots{}for amusement}
+ − 1776
+ − 1777
\begin{minipage}{1.3\textwidth}
+ − 1778
\includegraphics[scale=0.4]{../pics/chatgpt1.png}
+ − 1779
\end{minipage}
+ − 1780
+ − 1781
\end{frame}
+ − 1782
}
+ − 1783
879
+ − 1784
+ − 1785
+ − 1786
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
876
+ − 1787
% \begin{frame}[c]
+ − 1788
% \frametitle{Languages (Sets of Strings)}
+ − 1789
+ − 1790
% \begin{itemize}
+ − 1791
+ − 1792
% \item A \alert{\bf Language} is a set of strings, for example\medskip
+ − 1793
% \begin{center}
+ − 1794
% \bl{$\{[], hello, foobar, a, abc\}$}
+ − 1795
% \end{center}\bigskip
+ − 1796
+ − 1797
% \item \alert{\bf Concatenation} for strings and languages
+ − 1798
+ − 1799
% \begin{center}
+ − 1800
% \begin{tabular}{rcl}
+ − 1801
% \bl{$foo\;@\;bar$} & \bl{$=$} & \bl{$foobar$}\medskip\\
+ − 1802
% \bl{$A\;@\;B$} & \bl{$\dn$} & \bl{$\{ s_1\,@\,s_2 \;\mid\; s_1 \in A \wedge s_2 \in B\}$}
+ − 1803
% \end{tabular}
+ − 1804
% \end{center}
+ − 1805
% \bigskip
+ − 1806
+ − 1807
% \small
+ − 1808
% \item [] For example \bl{$A = \{foo, bar\}$}, \bl{$B = \{a, b\}$}
+ − 1809
+ − 1810
% \[
+ − 1811
% \bl{A \,@\, B = \{fooa, foob, bara, barb\}}
+ − 1812
% \]
+ − 1813
+ − 1814
+ − 1815
+ − 1816
+ − 1817
% \end{itemize}
+ − 1818
% \end{frame}
+ − 1819
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1820
+ − 1821
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1822
% \begin{frame}[c]
+ − 1823
% \frametitle{Two Corner Cases}
+ − 1824
+ − 1825
% \Large
+ − 1826
% \begin{center}
+ − 1827
% \bl{$A \,@\, \{[]\} = \;?$}\bigskip\bigskip\pause\\
+ − 1828
% \bl{$A \,@\, \{\} = \;?$}
+ − 1829
% \end{center}
+ − 1830
+ − 1831
% \end{frame}
+ − 1832
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1833
+ − 1834
+ − 1835
+ − 1836
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1837
% \begin{frame}[c]
+ − 1838
% \frametitle{The Meaning of a Regex}
+ − 1839
+ − 1840
% ...all the strings a regular expression can match.
+ − 1841
+ − 1842
% \begin{center}
+ − 1843
% \begin{tabular}{rcl}
+ − 1844
% \bl{$L(\ZERO)$} & \bl{$\dn$} & \bl{$\{\}$}\\
+ − 1845
% \bl{$L(\ONE)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
+ − 1846
% \bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\
+ − 1847
% \bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
+ − 1848
% \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \,@\, L(r_2)$}\\
+ − 1849
% \bl{$L(r^*)$} & \bl{$\dn$} & \\
+ − 1850
% \end{tabular}
+ − 1851
% \end{center}
+ − 1852
+ − 1853
% \begin{textblock}{14}(1.5,13.5)\small
+ − 1854
% \bl{$L$} is a function from regular expressions to
+ − 1855
% sets of strings (languages):\smallskip\\
+ − 1856
% \bl{\quad$L$ : Rexp $\Rightarrow$ Set$[$String$]$}
+ − 1857
% \end{textblock}
+ − 1858
+ − 1859
% \end{frame}
+ − 1860
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1861
+ − 1862
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1863
% \begin{frame}[c]
+ − 1864
% \frametitle{The Power Operation}
+ − 1865
+ − 1866
% \begin{itemize}
+ − 1867
% \item The \alert{\textbf{\boldmath$n$th Power}} of a language:
+ − 1868
+ − 1869
% \begin{center}
+ − 1870
% \begin{tabular}{lcl}
+ − 1871
% \bl{$A^0$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
+ − 1872
% \bl{$A^{n+1}$} & \bl{$\dn$} & \bl{$A \,@\, A^n$}
+ − 1873
% \end{tabular}
+ − 1874
% \end{center}\bigskip
+ − 1875
+ − 1876
% \item[] For example
+ − 1877
+ − 1878
% \begin{center}
+ − 1879
% \begin{tabular}{lcl@{\hspace{10mm}}l}
+ − 1880
% \bl{$A^4$} & \bl{$=$} & \bl{$A \,@\, A \,@\, A \,@\, A$} & \bl{$(@\,\{[]\})$}\\
+ − 1881
% \bl{$A^1$} & \bl{$=$} & \bl{$A$} & \bl{$(@\,\{[]\})$}\\
+ − 1882
% \bl{$A^0$} & \bl{$=$} & \bl{$\{[]\}$}\\
+ − 1883
% \end{tabular}
+ − 1884
% \end{center}
+ − 1885
+ − 1886
% \end{itemize}
+ − 1887
+ − 1888
% \end{frame}
+ − 1889
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1890
+ − 1891
+ − 1892
+ − 1893
+ − 1894
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1895
% \begin{frame}[c]
+ − 1896
% \frametitle{Written Exam}
+ − 1897
+ − 1898
% \begin{itemize}
+ − 1899
% \item Accounts for 80\%.\bigskip
+ − 1900
+ − 1901
% \item The question ``\textit{Is this relevant for
+ − 1902
% the exam?}'' is very demotivating for the lecturer!\bigskip\\
+ − 1903
+ − 1904
% \item Deal: Whatever is in the homework (and is not marked
+ − 1905
% ``\textit{optional}'') is relevant for the exam.\bigskip
+ − 1906
+ − 1907
% \item Each lecture has also a handout. There are also handouts about
+ − 1908
% notation and Scala.
+ − 1909
% \end{itemize}
+ − 1910
+ − 1911
% \end{frame}
+ − 1912
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1913
+ − 1914
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1915
% \begin{frame}[t]
+ − 1916
% \frametitle{Coursework}
+ − 1917
+ − 1918
% \begin{itemize}
+ − 1919
% \item Accounts for 20\%. Two strands. Choose \alert{\bf one}!\bigskip
+ − 1920
% \end{itemize}
+ − 1921
+ − 1922
% \begin{columns}[t]
+ − 1923
% \begin{column}{.5\textwidth}
+ − 1924
% \underline{\bf Strand 1}\medskip
+ − 1925
% \begin{itemize}
+ − 1926
% \item 4 programming tasks:
+ − 1927
% \begin{itemize}
+ − 1928
% \item matcher (4\%, 11.10.)
+ − 1929
% \item lexer (5\%, 04.11.)
+ − 1930
% \item parser (5\%, 22.11.)
+ − 1931
% \item compiler (6\%, 13.12.)
+ − 1932
% \end{itemize}
+ − 1933
% \item in any lang.~you like,\\ but I want to see the\\ code
+ − 1934
% \end{itemize}
+ − 1935
% \end{column}
+ − 1936
+ − 1937
% \hspace{-45pt}\vrule{}\hspace{10pt}
+ − 1938
% \begin{column}{.5\textwidth}
+ − 1939
% \underline{\bf Strand 2}\smallskip\begin{itemize}
+ − 1940
% \item one task: prove the correctness of a regular expression matcher in
+ − 1941
% the \underline{Isabelle} theorem prover
+ − 1942
% \item 20\%, submission on~13.12.\hspace{-5mm}\mbox{}
+ − 1943
% \end{itemize}
+ − 1944
% \end{column}
+ − 1945
% \end{columns}\medskip
+ − 1946
+ − 1947
% \small
+ − 1948
% \begin{itemize}
+ − 1949
% \item Solving more than one strand will {\bf not} give you more
+ − 1950
% marks.
+ − 1951
+ − 1952
% \end{itemize}
+ − 1953
+ − 1954
% \end{frame}
+ − 1955
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1956
+ − 1957
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1958
%\begin{frame}[c]
+ − 1959
%\frametitle{Lecture Capture}
+ − 1960
%
+ − 1961
%\begin{itemize}
+ − 1962
%\item Hope it works\ldots\pause actually no, it does not!\medskip\pause
+ − 1963
%\item It is important to use lecture capture wisely\\ (it is only the ``baseline''):
+ − 1964
%\begin{itemize}
+ − 1965
%\item Lecture recordings are a study and revision aid.
+ − 1966
%\item Statistically, there is a clear and direct link between attendance and
+ − 1967
% attainment: students who do not attend lectures, do less well in exams.
+ − 1968
%\end{itemize}
+ − 1969
%
+ − 1970
%\item Attending a lecture is more than watching it online -- if you do not
+ − 1971
%attend, you miss out!
+ − 1972
%
+ − 1973
%\end{itemize}
+ − 1974
%
+ − 1975
%\end{frame}
+ − 1976
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1977
+ − 1978
+ − 1979
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 1980
\begin{frame}[c]
929
+ − 1981
\frametitle{\begin{tabular}{c}\\[1cm]\alert{Questions?}\end{tabular}}
876
+ − 1982
+ − 1983
+ − 1984
\begin{tabular}{lll}
922
+ − 1985
SGT TAs: & Flavio Melinte Citea & (was a KURF last summer)\\
+ − 1986
& Krishi Wali \\
+ − 1987
& Meilai Ji \medskip\\
929
+ − 1988
Amm Helpers & Harry Dilnot & (harry.dilnot@kcl.ac.uk)\\
+ − 1989
& Meilai Ji & (meilai.ji@kcl.ac.uk)\medskip\\
876
+ − 1990
\end{tabular}
+ − 1991
\mbox{}
+ − 1992
\end{frame}
+ − 1993
+ − 1994
\begin{frame}[c]
+ − 1995
\end{frame}
+ − 1996
+ − 1997
\begin{frame}[c]
+ − 1998
\end{frame}
+ − 1999
+ − 2000
\begin{frame}[c]
+ − 2001
\end{frame}
+ − 2002
+ − 2003
\begin{frame}[c]
+ − 2004
\end{frame}
+ − 2005
+ − 2006
\begin{frame}[c]
+ − 2007
\end{frame}
+ − 2008
+ − 2009
\begin{frame}[c]
+ − 2010
\end{frame}
+ − 2011
+ − 2012
\begin{frame}[c]
+ − 2013
\end{frame}
+ − 2014
+ − 2015
\begin{frame}[c]
+ − 2016
\end{frame}
+ − 2017
+ − 2018
\begin{frame}[c]
+ − 2019
\end{frame}
+ − 2020
+ − 2021
\begin{frame}[c]
+ − 2022
\end{frame}
+ − 2023
+ − 2024
\begin{frame}[c]
+ − 2025
\end{frame}
+ − 2026
+ − 2027
\begin{frame}[c]
+ − 2028
\end{frame}
+ − 2029
+ − 2030
\begin{frame}[c]
+ − 2031
\end{frame}
+ − 2032
+ − 2033
\begin{frame}[c]
+ − 2034
\end{frame}
+ − 2035
+ − 2036
+ − 2037
\begin{frame}[c]
+ − 2038
\begin{mybox3}{Coursework}
+ − 2039
Do we need to provide instructions on running the coursework files
+ − 2040
if we're using languages other than Scala? Thanks
+ − 2041
\end{mybox3}\pause
+ − 2042
+ − 2043
\begin{mybox2}{Zip-File for Coursework}
+ − 2044
Please, please submit a zipfile that generates a subdirectory
+ − 2045
\begin{center}
+ − 2046
\texttt{NameFamilyName}
+ − 2047
\end{center}
+ − 2048
\end{mybox2}
+ − 2049
\end{frame}
+ − 2050
+ − 2051
+ − 2052
\begin{frame}[c]
+ − 2053
\begin{mybox3}{What is the trick?}\small
+ − 2054
What was the trick to improve the evil regular expressions matcher
+ − 2055
to have such good results compared to other programming languages?
+ − 2056
Is it working better on casual regular expressions (the ones that
+ − 2057
Python and Java handle pretty well), too? Or was it just optimised
+ − 2058
for these evil ones?
+ − 2059
\end{mybox3}
+ − 2060
\end{frame}
+ − 2061
+ − 2062
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 2063
\begin{frame}[c]
+ − 2064
\frametitle{Thanks to Martin Mikusovic}
+ − 2065
+ − 2066
\bigskip
+ − 2067
\begin{center}
+ − 2068
\begin{tikzpicture}
+ − 2069
\begin{axis}[
+ − 2070
xlabel={$n$},
+ − 2071
x label style={at={(1.05,0.0)}},
+ − 2072
ylabel={time in secs},
+ − 2073
enlargelimits=false,
+ − 2074
xtick={0,5,...,30},
+ − 2075
xmax=33,
+ − 2076
ymax=35,
+ − 2077
ytick={0,10,...,30},
+ − 2078
scaled ticks=false,
+ − 2079
axis lines=left,
+ − 2080
width=9cm,
+ − 2081
height=5.5cm,
+ − 2082
legend entries={Java 8, Python, JavaScript, Swift},
+ − 2083
legend pos=north west,
+ − 2084
legend cell align=left]
+ − 2085
\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
+ − 2086
\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
+ − 2087
\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
+ − 2088
\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
+ − 2089
\end{axis}
+ − 2090
\end{tikzpicture}
+ − 2091
\end{center}
+ − 2092
+ − 2093
Regex: \bl{$(a^*)^* \cdot b$}
+ − 2094
+ − 2095
Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
+ − 2096
+ − 2097
\end{frame}
+ − 2098
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 2099
+ − 2100
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 2101
\begin{frame}[c]
+ − 2102
\frametitle{Same Example in Java 9+}
+ − 2103
+ − 2104
\begin{center}
+ − 2105
\begin{tikzpicture}
+ − 2106
\begin{axis}[
+ − 2107
xlabel={$n$},
+ − 2108
x label style={at={(1.09,-0.15)}},
+ − 2109
ylabel={time in secs},
+ − 2110
scaled x ticks=false,
+ − 2111
enlargelimits=false,
+ − 2112
xtick distance=10000,
+ − 2113
xmax=44000,
+ − 2114
ytick={0,10,...,30},
+ − 2115
ymax=35,
+ − 2116
axis lines=left,
+ − 2117
width=9cm,
+ − 2118
height=5cm,
+ − 2119
legend entries={Java \liningnums{9}+},
+ − 2120
legend pos=north west,
+ − 2121
legend cell align=left]
+ − 2122
\addplot[blue,mark=square*,mark options={fill=white}] table {re-java9.data};
+ − 2123
\end{axis}
+ − 2124
\end{tikzpicture}
+ − 2125
\end{center}
+ − 2126
+ − 2127
Regex: \bl{$(a^*)^* \cdot b$}
+ − 2128
+ − 2129
Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
+ − 2130
+ − 2131
\end{frame}
+ − 2132
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 2133
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 2134
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 2135
% Questions
+ − 2136
+ − 2137
+ − 2138
+ − 2139
+ − 2140
\begin{frame}[c]
+ − 2141
\end{frame}
+ − 2142
+ − 2143
\begin{frame}[c]
+ − 2144
\end{frame}
+ − 2145
+ − 2146
\begin{frame}[c]
+ − 2147
\end{frame}
+ − 2148
+ − 2149
\begin{frame}[c]
+ − 2150
\end{frame}
+ − 2151
879
+ − 2152
\begin{frame}[c]
876
+ − 2153
+ − 2154
\end{frame}
+ − 2155
+ − 2156
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ − 2157
\end{document}
+ − 2158
+ − 2159
%%% Local Variables:
+ − 2160
%%% mode: latex
+ − 2161
%%% TeX-master: t
+ − 2162
%%% End:
+ − 2163