| author | update | 
| Mon, 05 Oct 2020 17:46:12 +0100 | |
| changeset 774 | a9fcd8afcd6a | 
| parent 769 | b153de5339bc | 
| child 778 | ae85207c6a93 | 
| permissions | -rw-r--r-- | 
| 631 | 1  | 
% !TEX program = xelatex  | 
| 743 | 2  | 
\documentclass[dvipsnames,14pt,t,xelatex,aspectratio=169,xcolor={table}]{beamer}
 | 
| 
252
 
e8ef8f38ca84
added style files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
215 
diff
changeset
 | 
3  | 
\usepackage{../slides}
 | 
| 
 
e8ef8f38ca84
added style files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
215 
diff
changeset
 | 
4  | 
\usepackage{../graphics}
 | 
| 
215
 
828303e8e4af
updated slides
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
98 
diff
changeset
 | 
5  | 
\usepackage{../langs}
 | 
| 
 
828303e8e4af
updated slides
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
98 
diff
changeset
 | 
6  | 
\usepackage{../data}
 | 
| 0 | 7  | 
|
| 754 | 8  | 
\usepackage{tcolorbox}
 | 
9  | 
\newtcolorbox{mybox}{colback=red!5!white,colframe=red!75!black}
 | 
|
10  | 
\newtcolorbox{mybox2}[1]{colback=red!5!white,colframe=red!75!black,fonttitle=\bfseries,title=#1}
 | 
|
11  | 
\newtcolorbox{mybox3}[1]{colback=Cyan!5!white,colframe=Cyan!75!black,fonttitle=\bfseries,title=#1}
 | 
|
12  | 
||
| 743 | 13  | 
|
14  | 
||
| 
252
 
e8ef8f38ca84
added style files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
215 
diff
changeset
 | 
15  | 
\hfuzz=220pt  | 
| 
 
e8ef8f38ca84
added style files
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
215 
diff
changeset
 | 
16  | 
|
| 
254
 
dcd4688690ce
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
253 
diff
changeset
 | 
17  | 
\lstset{language=Scala,
 | 
| 
 
dcd4688690ce
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
253 
diff
changeset
 | 
18  | 
style=mystyle,  | 
| 
 
dcd4688690ce
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
253 
diff
changeset
 | 
19  | 
numbersep=0pt,  | 
| 
 
dcd4688690ce
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
253 
diff
changeset
 | 
20  | 
numbers=none,  | 
| 
 
dcd4688690ce
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
253 
diff
changeset
 | 
21  | 
xleftmargin=0mm}  | 
| 
 
dcd4688690ce
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
253 
diff
changeset
 | 
22  | 
|
| 
253
 
75c469893514
added coursework
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
252 
diff
changeset
 | 
23  | 
\newcommand{\bl}[1]{\textcolor{blue}{#1}}     
 | 
| 559 | 24  | 
|
| 0 | 25  | 
% beamer stuff  | 
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
26  | 
\renewcommand{\slidecaption}{CFL 01, King's College London}
 | 
| 0 | 27  | 
|
| 
721
 
e712943cff71
added slides from Rochester
 
Christian Urban <christian.urban@kcl.ac.uk> 
parents: 
637 
diff
changeset
 | 
28  | 
%% https://cs.rit.edu/~hh/teaching/_media/cc18/lectures/lect1/main.pdf  | 
| 
 
e712943cff71
added slides from Rochester
 
Christian Urban <christian.urban@kcl.ac.uk> 
parents: 
637 
diff
changeset
 | 
29  | 
%% https://cs.rit.edu/~hh/teaching/_media/cc18/lectures/lect2/main.pdf  | 
| 
 
e712943cff71
added slides from Rochester
 
Christian Urban <christian.urban@kcl.ac.uk> 
parents: 
637 
diff
changeset
 | 
30  | 
%% https://cs.rit.edu/~hh/teaching/_media/cc18/lectures/lect3/main.pdf  | 
| 0 | 31  | 
|
32  | 
\begin{document}
 | 
|
33  | 
||
| 758 | 34  | 
%\begin{frame}[t]
 | 
35  | 
%\begin{mybox}
 | 
|
36  | 
%A physical explanation the \emph{dynamic matrix}\\
 | 
|
37  | 
%lots of text  | 
|
38  | 
%\end{mybox}
 | 
|
| 743 | 39  | 
|
40  | 
||
| 758 | 41  | 
%\begin{mybox2}{Test}
 | 
42  | 
%A physical explanation the \emph{dynamic matrix}\\
 | 
|
43  | 
%lots of text  | 
|
44  | 
%\end{mybox2}
 | 
|
| 754 | 45  | 
|
| 758 | 46  | 
%\begin{mybox3}{Test}
 | 
47  | 
%A physical explanation the \emph{dynamic matrix}\\
 | 
|
48  | 
%lots of text  | 
|
49  | 
%\end{mybox3}
 | 
|
50  | 
%\end{frame}
 | 
|
| 754 | 51  | 
|
| 0 | 52  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 
253
 
75c469893514
added coursework
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
252 
diff
changeset
 | 
53  | 
\begin{frame}[t]
 | 
| 510 | 54  | 
\frametitle{%  
 | 
| 0 | 55  | 
  \begin{tabular}{@ {}c@ {}}
 | 
| 1 | 56  | 
\\[-3mm]  | 
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
57  | 
\LARGE Compilers and \\[-1mm]  | 
| 743 | 58  | 
\LARGE Formal Languages\\[-3mm]  | 
| 0 | 59  | 
  \end{tabular}}
 | 
60  | 
||
61  | 
  \begin{center}
 | 
|
| 507 | 62  | 
  %\includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm}
 | 
63  | 
  %\includegraphics[scale=0.31]{pics/ante2.jpg}\\
 | 
|
64  | 
  %\footnotesize\textcolor{gray}{Antikythera automaton, 100 BC (Archimedes?)}
 | 
|
| 0 | 65  | 
  \end{center}
 | 
66  | 
||
| 510 | 67  | 
\normalsize  | 
| 0 | 68  | 
  \begin{center}
 | 
69  | 
  \begin{tabular}{ll}
 | 
|
70  | 
Email: & christian.urban at kcl.ac.uk\\  | 
|
| 743 | 71  | 
%Office Hours: & Thursdays 12 -- 14\\  | 
72  | 
%Location: & N7.07 (North Wing, Bush House)\\  | 
|
| 631 | 73  | 
Slides \& Progs: & KEATS\\  | 
| 0 | 74  | 
  \end{tabular}
 | 
75  | 
  \end{center}
 | 
|
76  | 
||
| 743 | 77  | 
  \begin{center}
 | 
78  | 
    \begin{tikzpicture}
 | 
|
79  | 
\node[drop shadow,fill=white,inner sep=0pt]  | 
|
80  | 
      {\footnotesize\rowcolors{1}{capri!10}{white}
 | 
|
81  | 
        \begin{tabular}{|p{4.8cm}|p{4.8cm}|}\hline
 | 
|
82  | 
          \cellcolor{blue!50}
 | 
|
83  | 
1 Introduction, Languages & 6 While-Language \\  | 
|
84  | 
2 Regular Expressions, Derivatives & 7 Compilation, JVM \\  | 
|
85  | 
3 Automata, Regular Languages & 8 Compiling Functional Languages \\  | 
|
86  | 
4 Lexing, Tokenising & 9 Optimisations \\  | 
|
87  | 
5 Grammars, Parsing & 10 LLVM \\ \hline  | 
|
88  | 
        \end{tabular}%
 | 
|
89  | 
};  | 
|
90  | 
    \end{tikzpicture}
 | 
|
91  | 
  \end{center}
 | 
|
92  | 
||
| 
253
 
75c469893514
added coursework
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
252 
diff
changeset
 | 
93  | 
\end{frame}
 | 
| 559 | 94  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 0 | 95  | 
|
96  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
| 756 | 97  | 
\begin{frame}<1-12>[c]
 | 
| 744 | 98  | 
\frametitle{The Goal of this Module\ldots}
 | 
99  | 
||
100  | 
\begin{center}
 | 
|
101  | 
  \begin{tikzpicture}[scale=1,
 | 
|
102  | 
                      node/.style={
 | 
|
103  | 
rectangle,rounded corners=3mm,  | 
|
104  | 
very thick,draw=black!50,minimum height=18mm, minimum width=20mm,  | 
|
105  | 
top color=white,bottom color=black!20,drop shadow}]  | 
|
106  | 
||
107  | 
  \node at (3.05, 1.8) {\Large\bf \ldots{} you write a compiler};
 | 
|
108  | 
||
109  | 
  \node (0) at (-2.3,0) {};  
 | 
|
110  | 
\node [above=5mm of 0]  | 
|
111  | 
  {\makebox[0mm]{\footnotesize
 | 
|
112  | 
      \begin{tabular}{@{}l@{}}input\\[-1mm]program\end{tabular}}}; 
 | 
|
113  | 
||
114  | 
  \node (A) at (0,0)  [node] {};
 | 
|
115  | 
  \node [below right] at (A.north west) {lexer};
 | 
|
116  | 
||
117  | 
  \node (B) at (3,0)  [node] {};
 | 
|
118  | 
  \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser};
 | 
|
119  | 
||
120  | 
  \node (C) at (6,0)  [node] {};
 | 
|
121  | 
  \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen};
 | 
|
122  | 
||
123  | 
  \node (1) at (8.4,0) {};
 | 
|
124  | 
\node [above=5mm of 1]  | 
|
125  | 
  {\makebox[0mm]{\footnotesize
 | 
|
126  | 
      \begin{tabular}{@{}r@{}}binary\\[-1mm]code\end{tabular}}};
 | 
|
127  | 
||
128  | 
\draw [->,line width=4mm] (0) -- (A);  | 
|
129  | 
\draw [->,line width=4mm] (A) -- (B);  | 
|
130  | 
\draw [->,line width=4mm] (B) -- (C);  | 
|
131  | 
\draw [->,line width=4mm] (C) -- (1);  | 
|
132  | 
  \end{tikzpicture}
 | 
|
133  | 
  \end{center}
 | 
|
134  | 
||
135  | 
\only<2,3,4>{
 | 
|
136  | 
\begin{textblock}{1}(1,2.1)
 | 
|
137  | 
\begin{bubble}[9.8cm]
 | 
|
138  | 
\normalsize  | 
|
139  | 
lexer input: a string\smallskip\\  | 
|
140  | 
\hspace{5mm}\code{"read(n);"}\medskip\\
 | 
|
141  | 
lexer output: a sequence of tokens\smallskip\\  | 
|
142  | 
\hspace{5mm}\code{key(read) lpar id(n) rpar semi}
 | 
|
143  | 
\end{bubble}
 | 
|
144  | 
\end{textblock}} 
 | 
|
145  | 
||
146  | 
\only<3,4>{
 | 
|
147  | 
\begin{textblock}{1}(6,7.8)
 | 
|
148  | 
\begin{tabular}{c}
 | 
|
149  | 
\includegraphics[scale=0.2]{../pics/rosetta.jpg}\\[-2mm]
 | 
|
150  | 
\footnotesize lexing $\Rightarrow$ recognising words (Stone of Rosetta)  | 
|
151  | 
\end{tabular}
 | 
|
152  | 
\end{textblock}}
 | 
|
153  | 
||
154  | 
\only<4>{
 | 
|
155  | 
\begin{textblock}{1}(0.5,12)\small
 | 
|
156  | 
\begin{tabular}{l@{}c@{}l}
 | 
|
157  | 
  \pcode{if}    & $\;\Rightarrow\;$ & keyword\\
 | 
|
158  | 
  \pcode{iffoo} & $\;\Rightarrow\;$ & identifier\\
 | 
|
159  | 
\end{tabular}  
 | 
|
160  | 
\end{textblock}}
 | 
|
161  | 
||
162  | 
\only<6>{
 | 
|
163  | 
\begin{textblock}{1}(1,1.5)
 | 
|
164  | 
\begin{bubble}[8.5cm]
 | 
|
165  | 
\normalsize  | 
|
166  | 
parser input: a sequence of tokens\smallskip\\  | 
|
167  | 
||
168  | 
{\small\hspace{5mm}\code{key(read) lpar id(n) rpar semi}}\smallskip\\
 | 
|
169  | 
||
170  | 
parser output: an abstract syntax tree\smallskip\\  | 
|
171  | 
\footnotesize  | 
|
172  | 
\hspace{2cm}\begin{tikzpicture}
 | 
|
173  | 
  \node {\code{read}}
 | 
|
174  | 
    child {node {\code{lpar}}}
 | 
|
175  | 
    child {node {\code{n}}}
 | 
|
176  | 
    child {node {\code{rpar}}};
 | 
|
177  | 
\end{tikzpicture}
 | 
|
178  | 
\end{bubble}
 | 
|
179  | 
\end{textblock}}
 | 
|
180  | 
||
181  | 
\only<8,9>{
 | 
|
182  | 
\begin{textblock}{1}(1,1.5)
 | 
|
183  | 
\begin{bubble}[4cm]
 | 
|
184  | 
\normalsize  | 
|
185  | 
code generation:\smallskip\\  | 
|
186  | 
\hspace{5mm}\code{istore 2}\\ 
 | 
|
187  | 
\hspace{5mm}\code{iload 2}\\ 
 | 
|
188  | 
\hspace{5mm}\code{ldc 10}\\
 | 
|
189  | 
\hspace{5mm}\code{isub}\\
 | 
|
190  | 
\hspace{5mm}\code{ifeq Label2}\\ 
 | 
|
191  | 
\hspace{5mm}\code{iload 2}\\
 | 
|
192  | 
\hspace{5mm}\code{...}\\
 | 
|
193  | 
\end{bubble}
 | 
|
194  | 
\end{textblock}}
 | 
|
195  | 
||
196  | 
\only<9>{
 | 
|
197  | 
\begin{textblock}{6}(8.4,7)
 | 
|
198  | 
\begin{bubble}[5cm]
 | 
|
199  | 
\mbox{\begin{tikzpicture}[scale=0.58,rounded corners=0mm]
 | 
|
200  | 
\begin{axis}[axis x line=bottom, axis y line=left, ylabel=secs,
 | 
|
201  | 
xlabel=n,  | 
|
202  | 
enlargelimits=0.05,  | 
|
203  | 
ybar interval=0.7, legend style=small]  | 
|
204  | 
\addplot file {interpreted2.data};
 | 
|
205  | 
\addplot file {compiled2.data};
 | 
|
206  | 
%\legend{interpreted, compiled}
 | 
|
207  | 
\end{axis}
 | 
|
208  | 
\end{tikzpicture}}
 | 
|
209  | 
\end{bubble}
 | 
|
210  | 
\end{textblock}}
 | 
|
211  | 
||
212  | 
\only<10>{
 | 
|
213  | 
\begin{textblock}{6}(1,3)
 | 
|
214  | 
  \begin{bubble}[11cm]
 | 
|
| 756 | 215  | 
    Compiler explorers, e.g.: \url{https://gcc.godbolt.org} \;\video{https://youtu.be/ysaBmhMEyUg}
 | 
| 744 | 216  | 
  \begin{tikzpicture}[]
 | 
217  | 
  \node (0) at (-2.3,0) {\includegraphics[scale=0.3]{pics/csource.png}};
 | 
|
218  | 
  \node (1) [right=35mm] at (0) {\includegraphics[scale=0.3]{pics/cassmbl.png}}; 
 | 
|
219  | 
\draw [->,line width=4mm, red] (0) -- (1);  | 
|
| 756 | 220  | 
  \node (2) [below=20mm] at (0) {\LARGE\bf source};
 | 
221  | 
  \node (3) [right=40mm] at (2) {\LARGE\bf binary};
 | 
|
222  | 
\draw [->,line width=1mm] (2) -- (3);  | 
|
223  | 
\end{tikzpicture}
 | 
|
224  | 
\end{bubble}
 | 
|
225  | 
||
226  | 
\end{textblock}}
 | 
|
227  | 
\only<11>{
 | 
|
228  | 
\begin{textblock}{6}(1,3)
 | 
|
229  | 
  \begin{bubble}[11cm]
 | 
|
230  | 
    Compiler explorer for Java: \url{https://javap.yawk.at} 
 | 
|
231  | 
  \begin{tikzpicture}[]
 | 
|
232  | 
  \node (0) at (-2.3,0) {\includegraphics[scale=0.4]{pics/jsource.png}};
 | 
|
233  | 
  \node (1) [right=35mm] at (0) {\includegraphics[scale=0.4]{pics/jassmbl.png}}; 
 | 
|
234  | 
\draw [->,line width=4mm, red] (0) -- (1);  | 
|
235  | 
  \node (2) [below=20mm] at (0) {\LARGE\bf source};
 | 
|
236  | 
  \node (3) [right=40mm] at (2) {\LARGE\bf byte code};
 | 
|
| 744 | 237  | 
\draw [->,line width=1mm] (2) -- (3);  | 
238  | 
\end{tikzpicture}
 | 
|
239  | 
\end{bubble}
 | 
|
240  | 
\end{textblock}}
 | 
|
241  | 
||
242  | 
||
243  | 
\end{frame}
 | 
|
244  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
245  | 
||
246  | 
||
247  | 
||
248  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
| 559 | 249  | 
\begin{frame}[t]
 | 
250  | 
\frametitle{Why Study Compilers?}
 | 
|
| 0 | 251  | 
|
| 743 | 252  | 
|
253  | 
John Regehr {\small(Univ.~Utah, LLVM compiler hacker)}
 | 
|
254  | 
\here{https://blog.regehr.org/archives/1419}
 | 
|
255  | 
\smallskip\\  | 
|
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
256  | 
|
| 559 | 257  | 
\begin{bubble}[10.5cm]
 | 
258  | 
  \bf ``\ldots{}It’s effectively a perpetual
 | 
|
259  | 
employment act for solid compiler hackers.''  | 
|
260  | 
\end{bubble}
 | 
|
| 0 | 261  | 
|
| 559 | 262  | 
\onslide<1->{
 | 
263  | 
\only<2>{
 | 
|
264  | 
\begin{itemize}
 | 
|
265  | 
\item {\bf Hardware is getting weirder
 | 
|
| 631 | 266  | 
rather than getting clocked faster.}  | 
| 0 | 267  | 
|
| 559 | 268  | 
\begin{itemize}
 | 
| 631 | 269  | 
\item[] ``Almost all processors are multicores nowadays and it looks  | 
270  | 
like there is increasing asymmetry in resources across cores.  | 
|
271  | 
Processors come with vector units, crypto accelerators etc. We have  | 
|
272  | 
DSPs, GPUs, ARM big.little, and Xeon Phi. This is only scratching the  | 
|
273  | 
surface.''  | 
|
| 559 | 274  | 
\end{itemize}  
 | 
275  | 
\end{itemize}}
 | 
|
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
276  | 
\only<3>{
 | 
| 559 | 277  | 
\begin{itemize}
 | 
278  | 
\item {\bf We’re getting tired of low-level languages and
 | 
|
| 631 | 279  | 
their associated security disasters.}  | 
| 559 | 280  | 
|
281  | 
\begin{itemize}
 | 
|
| 631 | 282  | 
\item [] ``We want to write new code, to whatever extent possible, in  | 
283  | 
safer, higher-level languages. Compilers are caught right in the  | 
|
284  | 
middle of these opposing trends: one of their main jobs is to help  | 
|
285  | 
bridge the large and growing gap between increasingly high-level  | 
|
286  | 
languages and increasingly wacky platforms.''  | 
|
| 559 | 287  | 
\end{itemize}  
 | 
288  | 
\end{itemize}}}
 | 
|
| 1 | 289  | 
|
| 
253
 
75c469893514
added coursework
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
252 
diff
changeset
 | 
290  | 
\end{frame}
 | 
| 0 | 291  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 1 | 292  | 
|
| 631 | 293  | 
|
294  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
295  | 
\begin{frame}[c]
 | 
|
| 744 | 296  | 
\frametitle{Why Bother with Compilers?}
 | 
| 631 | 297  | 
|
| 745 | 298  | 
\textbf{Boeing 777's}: First flight in 1994. They want to achieve
 | 
299  | 
triple redundancy for potential hardware faults.  | 
|
300  | 
\here{http://www.citemaster.net/get/db3a81c6-548e-11e5-9d2e-00163e009cc7/R8.pdf}\bigskip
 | 
|
| 631 | 301  | 
|
302  | 
They compile 1 Ada program to\medskip  | 
|
303  | 
||
304  | 
\begin{itemize}
 | 
|
305  | 
\item Intel 80486  | 
|
306  | 
\item Motorola 68040 (old Macintosh's)  | 
|
307  | 
\item AMD 29050 (RISC chips used often in laser printers)  | 
|
308  | 
\end{itemize}\medskip\medskip
 | 
|
309  | 
||
310  | 
using 3 independent compilers.\bigskip\pause  | 
|
311  | 
||
312  | 
\small Airbus uses C and static analysers. Recently started using CompCert.  | 
|
| 745 | 313  | 
|
314  | 
\only<1->{%
 | 
|
315  | 
\begin{textblock}{6}(8,4.5)
 | 
|
316  | 
\includegraphics[scale=0.28]{../pics/777.png}
 | 
|
317  | 
\end{textblock}}
 | 
|
318  | 
||
| 631 | 319  | 
\end{frame}
 | 
| 756 | 320  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
321  | 
||
322  | 
||
323  | 
||
324  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
325  | 
\begin{frame}[c]
 | 
|
326  | 
\frametitle{What Do Compilers Do?}
 | 
|
327  | 
||
328  | 
Remember BF*** from PEP?  | 
|
329  | 
||
330  | 
\begin{center}
 | 
|
331  | 
\begin{tabular}{rcl}
 | 
|
332  | 
\bl{\texttt{>}} & $\Rightarrow$ & move one cell right\\
 | 
|
333  | 
\bl{\texttt{<}} & $\Rightarrow$ & move one cell left\\
 | 
|
334  | 
\bl{\texttt{+}} & $\Rightarrow$ & increase cell by one\\
 | 
|
335  | 
\bl{\texttt{-}} & $\Rightarrow$ & decrease cell by one\\
 | 
|
336  | 
\bl{\texttt{.}} & $\Rightarrow$ & print current cell\\
 | 
|
337  | 
\bl{\texttt{,}} & $\Rightarrow$ & input current cell\\
 | 
|
338  | 
\bl{\texttt{[}} & $\Rightarrow$ & loop begin\\
 | 
|
339  | 
\bl{\texttt{]}} & $\Rightarrow$ & loop end\medskip\\
 | 
|
340  | 
& $\Rightarrow$ & everything else is a comment\\  | 
|
341  | 
\end{tabular}  
 | 
|
342  | 
\end{center}  
 | 
|
343  | 
||
344  | 
\end{frame}
 | 
|
345  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
346  | 
||
347  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
348  | 
\begin{frame}[c]
 | 
|
349  | 
  \frametitle{A ``Compiler'' for BF*** to C}
 | 
|
350  | 
||
351  | 
  \begin{center}
 | 
|
352  | 
  \begin{tabular}{rcl}
 | 
|
353  | 
  \bl{\texttt{>}} & $\Rightarrow$ & \texttt{ptr++}\\
 | 
|
354  | 
  \bl{\texttt{<}} & $\Rightarrow$ & \texttt{ptr--}\\
 | 
|
355  | 
  \bl{\texttt{+}} & $\Rightarrow$ & \texttt{(*ptr)++}\\
 | 
|
356  | 
  \bl{\texttt{-}} & $\Rightarrow$ & \texttt{(*ptr)--}\\
 | 
|
357  | 
  \bl{\texttt{.}} & $\Rightarrow$ & \texttt{putchar(*ptr)}\\
 | 
|
358  | 
  \bl{\texttt{,}} & $\Rightarrow$ & \texttt{*ptr = getchar()}\\
 | 
|
359  | 
  \bl{\texttt{[}} & $\Rightarrow$ & \texttt{while(*ptr)\{}\\
 | 
|
360  | 
  \bl{\texttt{]}} & $\Rightarrow$ & \texttt{\}}\medskip\\
 | 
|
361  | 
& $\Rightarrow$ & ignore everything else\\  | 
|
362  | 
  \end{tabular}  
 | 
|
363  | 
  \end{center}\bigskip  
 | 
|
364  | 
||
365  | 
  \texttt{char field[30000]\\ char *ptr = \&field[15000]}
 | 
|
366  | 
||
367  | 
\end{frame}
 | 
|
368  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
369  | 
||
370  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
371  | 
\begin{frame}[c]
 | 
|
| 757 | 372  | 
  \frametitle{Another~``Compiler''~for~BF~to~C}
 | 
| 756 | 373  | 
|
374  | 
  \begin{center}
 | 
|
375  | 
  \begin{tabular}{rcl}
 | 
|
376  | 
  \bl{\texttt{>\ldots>}} & $\Rightarrow$ & \texttt{ptr += n}\\
 | 
|
377  | 
  \bl{\texttt{<\ldots<}} & $\Rightarrow$ & \texttt{ptr -= n}\\
 | 
|
378  | 
  \bl{\texttt{+\ldots+}} & $\Rightarrow$ & \texttt{(*ptr) += n}\\
 | 
|
379  | 
  \bl{\texttt{-\ldots-}} & $\Rightarrow$ & \texttt{(*ptr) -= n}\\
 | 
|
380  | 
  \bl{\texttt{.}} & $\Rightarrow$ & \texttt{putchar(*ptr)}\\
 | 
|
381  | 
  \bl{\texttt{,}} & $\Rightarrow$ & \texttt{*ptr = getchar()}\\
 | 
|
382  | 
  \bl{\texttt{[}} & $\Rightarrow$ & \texttt{while(*ptr)\{}\\
 | 
|
383  | 
  \bl{\texttt{]}} & $\Rightarrow$ & \texttt{\}}\medskip\\
 | 
|
384  | 
& $\Rightarrow$ & ignore everything else\\  | 
|
385  | 
  \end{tabular}  
 | 
|
386  | 
  \end{center}\bigskip  
 | 
|
387  | 
||
388  | 
  \texttt{char field[30000]\\ char *ptr = \&field[15000]}
 | 
|
389  | 
||
390  | 
\end{frame}
 | 
|
391  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
| 757 | 392  | 
|
| 756 | 393  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
394  | 
\begin{frame}[t]
 | 
|
395  | 
\frametitle{A Brief Compiler History}
 | 
|
396  | 
||
397  | 
\bigskip  | 
|
398  | 
\begin{itemize}
 | 
|
399  | 
\item Turing Machines, 1936 (a tape as memory)  | 
|
400  | 
\item Regular Expressions, 1956\\  | 
|
401  | 
\item The first compiler for COBOL, 1957\\ (Grace Hopper)\medskip  | 
|
402  | 
\item But surprisingly research papers are still published nowadays\\  | 
|
403  | 
\item ``Parsing: The Solved Problem That Isn't''  | 
|
404  | 
  \here{https://tratt.net/laurie/blog/entries/parsing_the_solved_problem_that_isnt.html}
 | 
|
405  | 
\end{itemize}
 | 
|
406  | 
||
407  | 
||
408  | 
\begin{textblock}{8.5}(5,7.6)
 | 
|
409  | 
\begin{flushright}
 | 
|
410  | 
\includegraphics[scale=0.3]{pics/hopper.jpg}\\
 | 
|
411  | 
\footnotesize\textcolor{gray}{Grace Hopper}\smallskip\\
 | 
|
412  | 
||
413  | 
{\small\textcolor{gray}{(she made it to David Letterman's Tonight Show
 | 
|
414  | 
 \here{https://youtu.be/3N_ywhx6_K0?t=31})}}
 | 
|
415  | 
\end{flushright}
 | 
|
416  | 
\end{textblock}
 | 
|
417  | 
||
418  | 
\end{frame}
 | 
|
419  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
420  | 
||
421  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
422  | 
\begin{frame}[c]
 | 
|
| 757 | 423  | 
\frametitle{Some Housekeeping}
 | 
424  | 
||
425  | 
\textbf{Exams will be online:}\bigskip
 | 
|
426  | 
||
427  | 
\begin{itemize}
 | 
|
428  | 
\item final exam in January (30\%)  | 
|
429  | 
\item mid-term shortly after Reading Week (10\%)\bigskip  | 
|
430  | 
||
431  | 
\item weekly engagement (10\%)  | 
|
432  | 
\end{itemize}\bigskip\bigskip\pause
 | 
|
433  | 
||
434  | 
||
435  | 
\textbf{Weekly Homework (optional):}
 | 
|
436  | 
\begin{itemize}
 | 
|
437  | 
\item uploaded on KEATS, send answers via email, responded individually  | 
|
438  | 
\item \alert{\bf all} questions in the exam and mid-term will be from the HW!!
 | 
|
439  | 
\end{itemize}  
 | 
|
440  | 
||
441  | 
\end{frame}
 | 
|
442  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
443  | 
||
444  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
445  | 
\begin{frame}[c]
 | 
|
446  | 
\frametitle{Some Housekeeping}
 | 
|
447  | 
||
448  | 
\textbf{Coursework (5 accounting for 45\%):}\bigskip
 | 
|
449  | 
||
450  | 
\begin{itemize}
 | 
|
451  | 
\item matcher (5\%)  | 
|
452  | 
\item lexer (8\%)  | 
|
453  | 
\item parser / interpreter (10\%)  | 
|
454  | 
\item JVM compiler (10\%)  | 
|
455  | 
\item LLVM compiler (12\%)  | 
|
456  | 
\end{itemize}\bigskip\pause
 | 
|
457  | 
||
458  | 
you can use any programming language you like (Haskell, Rust)\\\pause  | 
|
459  | 
you can use any code I showed you and uploaded to KEATS\ldots\textbf{BUT NOTHING ELSE!}\pause
 | 
|
460  | 
||
461  | 
\end{frame}
 | 
|
462  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
463  | 
||
464  | 
||
465  | 
||
466  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
467  | 
\begin{frame}[c]
 | 
|
468  | 
\frametitle{Lectures 1 - 5}
 | 
|
469  | 
||
470  | 
transforming strings into structured data\\[10mm]  | 
|
471  | 
||
472  | 
{\LARGE\bf Lexing} {\hfill{}based on regular expressions}\medskip\\
 | 
|
473  | 
\hspace{5mm}(recognising ``words'')\\[6mm]
 | 
|
474  | 
||
475  | 
{\LARGE\bf Parsing}\medskip\\
 | 
|
476  | 
\hspace{5mm}(recognising ``sentences'')
 | 
|
477  | 
||
478  | 
\begin{textblock}{1}(10,9.1)
 | 
|
479  | 
\begin{tabular}{c}
 | 
|
480  | 
\includegraphics[scale=0.1]{../pics/rosetta.jpg}\\[-2mm]
 | 
|
481  | 
\footnotesize Stone of Rosetta  | 
|
482  | 
\end{tabular}
 | 
|
483  | 
\end{textblock}
 | 
|
484  | 
||
485  | 
\end{frame}
 | 
|
486  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
487  | 
||
488  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
489  | 
\begin{frame}[c]
 | 
|
| 756 | 490  | 
\frametitle{Lectures 1 - 5}
 | 
491  | 
||
492  | 
transforming strings into structured data\\[10mm]  | 
|
493  | 
||
494  | 
{\LARGE\bf Lexing} {\hfill{}based on regular expressions}\medskip\\
 | 
|
495  | 
\hspace{5mm}(recognising ``words'')\\[6mm]
 | 
|
496  | 
||
497  | 
{\LARGE\bf Parsing}\medskip\\
 | 
|
498  | 
\hspace{5mm}(recognising ``sentences'')
 | 
|
499  | 
||
500  | 
\begin{textblock}{1}(10,9.1)
 | 
|
501  | 
\begin{tabular}{c}
 | 
|
502  | 
\includegraphics[scale=0.1]{../pics/rosetta.jpg}\\[-2mm]
 | 
|
503  | 
\footnotesize Stone of Rosetta  | 
|
504  | 
\end{tabular}
 | 
|
505  | 
\end{textblock}
 | 
|
506  | 
||
507  | 
\end{frame}
 | 
|
508  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
509  | 
||
510  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
511  | 
\begin{frame}[c]
 | 
|
512  | 
  \frametitle{Lectures 5 - 10}
 | 
|
513  | 
||
514  | 
code generation for a small imperative and a small functional language\\[10mm]  | 
|
515  | 
||
516  | 
  {\LARGE\bf Interpreters}\medskip\\
 | 
|
517  | 
  \hspace{5mm}(directly runs a program)\\[6mm]
 | 
|
518  | 
||
519  | 
  {\LARGE\bf Compilers}\medskip\\
 | 
|
520  | 
  \hspace{5mm}(generate JVM code and LLVM-IR code)
 | 
|
521  | 
||
522  | 
  \begin{textblock}{1}(8.8,8.1)
 | 
|
523  | 
  \begin{tabular}{c@{}c}
 | 
|
524  | 
    \includegraphics[scale=0.4]{../pics/javaduke.png} &
 | 
|
525  | 
    \includegraphics[scale=0.23]{../pics/llvmlogo.png}
 | 
|
526  | 
  \end{tabular}
 | 
|
527  | 
  \end{textblock}
 | 
|
528  | 
||
529  | 
  \end{frame}
 | 
|
| 757 | 530  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 756 | 531  | 
|
532  | 
||
533  | 
||
534  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
535  | 
\begin{frame}[t]
 | 
|
| 757 | 536  | 
\frametitle{Familiar Regular Expresssions}
 | 
| 756 | 537  | 
\small  | 
538  | 
\begin{center}
 | 
|
539  | 
\texttt{[a-z0-9\_$\backslash{}$.-]+ @ [a-z0-9$\backslash{}$.-]+ . [a-z$\backslash{}$.]\{2,6\}}
 | 
|
540  | 
\end{center}\smallskip
 | 
|
541  | 
||
542  | 
\begin{center}
 | 
|
543  | 
\begin{tabular}{@{}lp{8.5cm}@{}}
 | 
|
544  | 
\pcode{re*} & matches 0 or more times\\
 | 
|
545  | 
\pcode{re+} & matches 1 or more times\\
 | 
|
546  | 
\pcode{re?} & matches 0 or 1 times\\
 | 
|
547  | 
\pcode{re\{n\}}	& matches exactly \pcode{n} number of times\\
 | 
|
548  | 
\pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\
 | 
|
549  | 
\pcode{[...]} & matches any single character inside the brackets\\
 | 
|
550  | 
\pcode{[^...]} & matches any single character not inside the 
 | 
|
551  | 
brackets\\  | 
|
552  | 
\pcode{a-z A-Z} & character ranges\\
 | 
|
553  | 
\pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\
 | 
|
554  | 
\pcode{.} & matches every character except newline\\
 | 
|
555  | 
\pcode{(re)}	& groups regular expressions and remembers 
 | 
|
556  | 
the matched text  | 
|
557  | 
\end{tabular}
 | 
|
558  | 
\end{center}
 | 
|
559  | 
||
| 757 | 560  | 
\end{frame}
 | 
561  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
562  | 
||
563  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
564  | 
\begin{frame}[c]
 | 
|
565  | 
\frametitle{Some ``innocent'' examples}
 | 
|
566  | 
||
567  | 
Let's try two examples  | 
|
568  | 
||
569  | 
\begin{center}
 | 
|
570  | 
  \bl{\texttt{(a*)*b}}
 | 
|
571  | 
  \hspace{2cm}
 | 
|
572  | 
  \bl{\texttt{[a?]\{n\}[a]\{n\}}}
 | 
|
573  | 
\end{center}\bigskip\pause  
 | 
|
574  | 
||
575  | 
and match them with strings of the form  | 
|
576  | 
||
577  | 
\begin{center}
 | 
|
578  | 
  \bl{\texttt{a}},
 | 
|
579  | 
  \bl{\texttt{aa}},
 | 
|
580  | 
  \bl{\texttt{aaa}},
 | 
|
581  | 
  \bl{\texttt{aaaa}},
 | 
|
582  | 
  \bl{\texttt{aaaaa}},
 | 
|
583  | 
  \bl{$\underbrace{\texttt{a}...\texttt{a}}_n$}  
 | 
|
584  | 
\end{center}  
 | 
|
| 756 | 585  | 
|
586  | 
\end{frame}
 | 
|
587  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
588  | 
||
| 631 | 589  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
590  | 
\begin{frame}[c]
 | 
| 745 | 591  | 
\frametitle{Why Bother with Regexes?}
 | 
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
592  | 
|
| 745 | 593  | 
\begin{columns}[t,onlytextwidth]
 | 
594  | 
\begin{column}{1.8cm}
 | 
|
595  | 
\mbox{}   
 | 
|
596  | 
\end{column}    
 | 
|
597  | 
\begin{column}{.5\textwidth}
 | 
|
598  | 
\small{}Ruby, Python, Java 8\medskip\\
 | 
|
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
599  | 
\begin{tikzpicture}\footnotesize
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
600  | 
\begin{axis}[
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
601  | 
    xlabel={$n$},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
602  | 
    x label style={at={(1.05,0.0)}},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
603  | 
    ylabel={time in secs},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
604  | 
enlargelimits=false,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
605  | 
    xtick={0,5,...,30},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
606  | 
xmax=33,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
607  | 
ymax=35,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
608  | 
    ytick={0,5,...,30},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
609  | 
scaled ticks=false,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
610  | 
axis lines=left,  | 
| 745 | 611  | 
width=\textwidth,  | 
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
612  | 
height=4cm,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
613  | 
    legend entries={Python,Ruby},  
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
614  | 
legend pos=north west,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
615  | 
legend cell align=left]  | 
| 559 | 616  | 
\addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
 | 
617  | 
\addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data};
 | 
|
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
618  | 
\end{axis}
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
619  | 
\end{tikzpicture}
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
620  | 
\begin{tikzpicture}\footnotesize
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
621  | 
\begin{axis}[
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
622  | 
    xlabel={$n$},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
623  | 
    x label style={at={(1.05,0.0)}},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
624  | 
    ylabel={time in secs},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
625  | 
enlargelimits=false,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
626  | 
    xtick={0,5,...,30},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
627  | 
xmax=33,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
628  | 
ymax=35,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
629  | 
    ytick={0,5,...,30},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
630  | 
scaled ticks=false,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
631  | 
axis lines=left,  | 
| 745 | 632  | 
width=\textwidth,  | 
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
633  | 
height=4cm,  | 
| 767 | 634  | 
    legend entries={Python, Java 8, JavaScript, Swift},  
 | 
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
635  | 
legend pos=north west,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
636  | 
legend cell align=left]  | 
| 559 | 637  | 
\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};   
 | 
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
638  | 
\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
 | 
| 631 | 639  | 
\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
 | 
| 767 | 640  | 
\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
 | 
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
641  | 
\end{axis}
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
642  | 
\end{tikzpicture}
 | 
| 745 | 643  | 
%  | 
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
644  | 
\end{column}
 | 
| 745 | 645  | 
\begin{column}{.5\textwidth}
 | 
646  | 
\small{}Us (after next lecture)\medskip\\
 | 
|
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
647  | 
\begin{tikzpicture}\footnotesize
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
648  | 
\begin{axis}[
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
649  | 
    xlabel={$n$},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
650  | 
    x label style={at={(1.07,0.0)}},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
651  | 
    ylabel={time in secs},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
652  | 
enlargelimits=false,  | 
| 
442
 
84d6714840c9
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
437 
diff
changeset
 | 
653  | 
    xtick={0,5000,...,10000},
 | 
| 
 
84d6714840c9
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
437 
diff
changeset
 | 
654  | 
xmax=11000,  | 
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
655  | 
ymax=35,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
656  | 
    ytick={0,5,...,30},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
657  | 
scaled ticks=false,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
658  | 
axis lines=left,  | 
| 745 | 659  | 
width=\textwidth,  | 
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
660  | 
height=4cm]  | 
| 
437
 
fe387fcbf2ee
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
428 
diff
changeset
 | 
661  | 
\addplot[green,mark=square*,mark options={fill=white}] table {re2.data};
 | 
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
662  | 
\addplot[black,mark=square*,mark options={fill=white}] table {re3.data};
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
663  | 
\end{axis}
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
664  | 
\end{tikzpicture}
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
665  | 
\begin{tikzpicture}\footnotesize
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
666  | 
\begin{axis}[
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
667  | 
    xlabel={$n$},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
668  | 
    x label style={at={(1.07,0.0)}},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
669  | 
    ylabel={time in secs},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
670  | 
enlargelimits=false,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
671  | 
ymax=35,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
672  | 
    ytick={0,5,...,30},
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
673  | 
scaled ticks=false,  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
674  | 
axis lines=left,  | 
| 745 | 675  | 
width=\textwidth,  | 
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
676  | 
height=4cm]  | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
677  | 
\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
 | 
| 
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
678  | 
\end{axis}
 | 
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
679  | 
\end{tikzpicture}
 | 
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
680  | 
\end{column}
 | 
| 745 | 681  | 
\end{columns}
 | 
682  | 
\medskip  | 
|
683  | 
||
684  | 
\begin{textblock}{3}(-0.1,3.3)
 | 
|
685  | 
\small\hfill\bl{\texttt{[a?]\{n\}[a]\{n\}}}:
 | 
|
686  | 
\end{textblock}
 | 
|
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
687  | 
|
| 745 | 688  | 
\begin{textblock}{3}(-0.1,8.7)  
 | 
689  | 
\small\hfill\bl{\texttt{(a*)*b}}:
 | 
|
690  | 
\end{textblock}
 | 
|
691  | 
||
692  | 
\begin{textblock}{3}(0.3,13)
 | 
|
693  | 
\small{}matching with strings
 | 
|
694  | 
\bl{$\underbrace{\texttt{a}...\texttt{a}}_n$}  
 | 
|
695  | 
\end{textblock}
 | 
|
696  | 
||
| 510 | 697  | 
\end{frame} 
 | 
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
698  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 631 | 699  | 
|
700  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
701  | 
\begin{frame}[c,fragile]
 | 
|
702  | 
  \frametitle{Incidents}
 | 
|
703  | 
||
704  | 
  \begin{itemize}
 | 
|
705  | 
  \item a global outage on 2 July 2019 at \textbf{Cloudflare} 
 | 
|
706  | 
(first one for six years)\medskip  | 
|
707  | 
||
708  | 
  \begin{center}\small\color{blue}
 | 
|
709  | 
  \begin{verbatim}  
 | 
|
710  | 
(?:(?:\"|'|\]|\}|\\|\d|(?:nan|infinity|true|false|  | 
|
711  | 
null|undefined|symbol|math)|\`|\-|\+)+[)]*;?((?:\s  | 
|
712  | 
  |-|~|!|{}|\|\||\+)*.*(?:.*=.*)))  
 | 
|
713  | 
  \end{verbatim}
 | 
|
714  | 
  \end{center}\bigskip\bigskip\bigskip\bigskip\bigskip\bigskip\bigskip    
 | 
|
715  | 
||
716  | 
  \item on 20 July 2016 the \textbf{Stack Exchange} webpage went down
 | 
|
| 745 | 717  | 
because of an evil regular expression  | 
718  | 
    \here{https://stackstatus.net/post/147710624694/outage-postmortem-july-20-2016}    
 | 
|
| 631 | 719  | 
  \end{itemize}
 | 
720  | 
||
| 745 | 721  | 
  \begin{textblock}{6}(6,7.6)
 | 
| 728 | 722  | 
    \includegraphics[scale=0.14]{../pics/cloudflare.png}\\
 | 
| 631 | 723  | 
\footnotesize  | 
| 745 | 724  | 
It serves more web traffic than Twitter, Amazon, Apple,  | 
725  | 
Instagram, Bing \& Wikipedia combined.  | 
|
726  | 
    \here{https://blog.cloudflare.com/details-of-the-cloudflare-outage-on-july-2-2019/}
 | 
|
| 631 | 727  | 
    \end{textblock}
 | 
728  | 
||
729  | 
  \end{frame}
 | 
|
730  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
731  | 
||
| 560 | 732  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
733  | 
\begin{frame}[c]
 | 
|
734  | 
\frametitle{Evil Regular Expressions}
 | 
|
735  | 
||
736  | 
\begin{itemize}
 | 
|
737  | 
\item \alert{R}egular \alert{e}xpression \alert{D}enial \alert{o}f \alert{S}ervice (ReDoS)\medskip
 | 
|
| 745 | 738  | 
\item Some evil regular expressions:\medskip  | 
| 560 | 739  | 
\begin{itemize}
 | 
| 745 | 740  | 
\item \bl{\texttt{[a?]\{n\}\;[a]\{n\}}}
 | 
741  | 
\item \bl{\texttt{(a*)*\;b}}  
 | 
|
742  | 
\item \bl{\texttt{([a-z]+)*}} 
 | 
|
743  | 
\item \bl{\texttt{(a + aa)*}}
 | 
|
744  | 
\item \bl{\texttt{(a + a?)*}}
 | 
|
| 560 | 745  | 
\end{itemize}
 | 
746  | 
||
747  | 
\item sometimes also called \alert{catastrophic backtracking}
 | 
|
748  | 
\item this is a problem for \alert{N}etwork \alert{I}ntrusion
 | 
|
| 631 | 749  | 
  \alert{D}etection systems, Cloudflare, StackExchange, Atom editor
 | 
| 560 | 750  | 
\item \url{https://vimeo.com/112065252}  
 | 
751  | 
\end{itemize}
 | 
|
752  | 
||
753  | 
\end{frame}
 | 
|
754  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
755  | 
|
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
756  | 
|
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
757  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 631 | 758  | 
%\begin{frame}[c]
 | 
759  | 
%\frametitle{Today}
 | 
|
760  | 
%  | 
|
761  | 
%\begin{itemize}
 | 
|
762  | 
%\item While the ultimate goal is to implement a small compiler for the JVM  | 
|
763  | 
% \ldots\bigskip  | 
|
764  | 
%\end{itemize}
 | 
|
765  | 
%  | 
|
766  | 
%Let's start with:  | 
|
767  | 
%  | 
|
768  | 
%\begin{itemize}
 | 
|
769  | 
%\item a web-crawler  | 
|
770  | 
%\item an email harvester  | 
|
| 559 | 771  | 
%\item \textcolor{gray}{(a web-scraper)}
 | 
| 631 | 772  | 
%\end{itemize}
 | 
773  | 
%  | 
|
774  | 
%\end{frame}
 | 
|
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
775  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 0 | 776  | 
|
| 2 | 777  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 631 | 778  | 
%\begin{frame}[t]
 | 
779  | 
%\frametitle{A Web-Crawler}
 | 
|
780  | 
%  | 
|
781  | 
%\mbox{}\\[10mm]
 | 
|
782  | 
%  | 
|
783  | 
%\begin{enumerate}
 | 
|
784  | 
%\item given an URL, read the corresponding webpage  | 
|
785  | 
%\item extract all links from it  | 
|
786  | 
%\item call the web-crawler again for all these links  | 
|
787  | 
%\end{enumerate}
 | 
|
788  | 
%  | 
|
789  | 
%\end{frame}
 | 
|
| 2 | 790  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
791  | 
||
792  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
| 631 | 793  | 
%\begin{frame}[t]
 | 
794  | 
%\frametitle{A Web-Crawler}
 | 
|
795  | 
%  | 
|
796  | 
%\mbox{}\\[10mm]
 | 
|
797  | 
%  | 
|
798  | 
%  | 
|
799  | 
%\begin{enumerate}
 | 
|
800  | 
%\item given an URL, read the corresponding webpage  | 
|
801  | 
%\item if not possible print, out a problem  | 
|
802  | 
%\item if possible, extract all links from it  | 
|
803  | 
%\item call the web-crawler again for all these links  | 
|
804  | 
%\end{enumerate}\bigskip\pause
 | 
|
805  | 
%  | 
|
806  | 
%\small (we need a bound for the number of recursive calls)  | 
|
807  | 
%  | 
|
808  | 
%\small (the purpose is to check all links on my own webpage)  | 
|
809  | 
%\end{frame}
 | 
|
| 2 | 810  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
811  | 
||
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
812  | 
|
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
813  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 631 | 814  | 
%\begin{frame}[c]
 | 
815  | 
%  | 
|
816  | 
%\begin{textblock}{1}(2,5)
 | 
|
817  | 
%\begin{tabular}{c}
 | 
|
818  | 
%\includegraphics[scale=0.15]{pics/servers.png}\\[-2mm]
 | 
|
819  | 
%\small Server  | 
|
820  | 
%\end{tabular}
 | 
|
821  | 
%\end{textblock}
 | 
|
822  | 
%  | 
|
823  | 
%\begin{textblock}{1}(5.6,4)
 | 
|
824  | 
%  \begin{tikzpicture}[scale=1.1]
 | 
|
825  | 
%  \draw[white] (0,1) node (X) {};
 | 
|
826  | 
%  \draw[white] (2,1) node (Y) {};
 | 
|
827  | 
%   \draw[white] (0,0) node (X1) {};
 | 
|
828  | 
%  \draw[white] (2,0) node (Y1) {};
 | 
|
829  | 
%   \draw[white] (0,-1) node (X2) {};
 | 
|
830  | 
%  \draw[white] (2,-1) node (Y2) {};
 | 
|
831  | 
% \draw[red, <-, line width = 2mm] (X) -- (Y);  | 
|
832  | 
%  \node [inner sep=5pt,label=above:\textcolor{black}{GET request}] at ($ (X)!.5!(Y) $) {};
 | 
|
833  | 
% \draw[red, ->, line width = 2mm] (X1) -- (Y1);  | 
|
834  | 
%  \node [inner sep=5pt,label=above:\textcolor{black}{webpage}] at ($ (X1)!.5!(Y1) $) {};
 | 
|
835  | 
% \draw[red, <-, line width = 2mm] (X2) -- (Y2);  | 
|
836  | 
%  \node [inner sep=7pt,label=above:\textcolor{black}{POST data}] at ($ (X2)!.5!(Y2) $) {};
 | 
|
837  | 
%  \end{tikzpicture}
 | 
|
838  | 
%\end{textblock}
 | 
|
839  | 
%  | 
|
840  | 
%  | 
|
841  | 
%\begin{textblock}{1}(9,5.5)
 | 
|
842  | 
%\begin{tabular}{c}
 | 
|
843  | 
%\includegraphics[scale=0.15]{pics/laptop.png}\\[-2mm]
 | 
|
844  | 
%\small Browser  | 
|
845  | 
%\end{tabular}
 | 
|
846  | 
%\end{textblock}
 | 
|
847  | 
%\end{frame}
 | 
|
848  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
849  | 
|
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
850  | 
|
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
851  | 
|
| 0 | 852  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 631 | 853  | 
%\begin{frame}[c]
 | 
854  | 
%\frametitle{Scala}
 | 
|
855  | 
%  | 
|
856  | 
%\small A simple Scala function for reading webpages:  | 
|
857  | 
%\bigskip  | 
|
858  | 
%  | 
|
859  | 
%\footnotesize  | 
|
860  | 
%\lstinputlisting{../progs/app0.scala}
 | 
|
861  | 
%\medskip\pause  | 
|
862  | 
%  | 
|
863  | 
%\lstinline{get_page("""https://nms.kcl.ac.uk/christian.urban/""")}
 | 
|
864  | 
%\bigskip\medskip\pause  | 
|
865  | 
%  | 
|
866  | 
%  | 
|
867  | 
%\small A slightly more complicated version for handling errors:  | 
|
868  | 
%\smallskip  | 
|
869  | 
%  | 
|
870  | 
%\footnotesize  | 
|
871  | 
%\lstinputlisting[xleftmargin=-4mm]{../progs/app1.scala}
 | 
|
872  | 
%  | 
|
873  | 
%  | 
|
874  | 
%\end{frame}
 | 
|
| 0 | 875  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
876  | 
||
| 
98
 
1f3d89fe9820
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
97 
diff
changeset
 | 
877  | 
|
| 
 
1f3d89fe9820
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
97 
diff
changeset
 | 
878  | 
|
| 
 
1f3d89fe9820
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
97 
diff
changeset
 | 
879  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 757 | 880  | 
%\begin{frame}[t]
 | 
881  | 
%\frametitle{A Regular Expression}
 | 
|
882  | 
%  | 
|
883  | 
%\begin{itemize}
 | 
|
884  | 
%\item \ldots{} is a pattern or template for specifying strings
 | 
|
885  | 
%\end{itemize}\bigskip
 | 
|
886  | 
%  | 
|
887  | 
%\begin{center}  
 | 
|
888  | 
%\only<1>{\scode{"https?://[^"]*"}}%
 | 
|
889  | 
%\only<2>{\scode{""""https?://[^"]*"""".r}}
 | 
|
890  | 
%\end{center}\bigskip\bigskip
 | 
|
891  | 
%  | 
|
892  | 
%matches for example\smallskip\\  | 
|
893  | 
%\hspace{2mm}\code{"http://www.foobar.com"}\\
 | 
|
894  | 
%\hspace{2mm}\code{"https://www.tls.org"}\smallskip\\
 | 
|
895  | 
%  | 
|
896  | 
%but not\smallskip\\  | 
|
897  | 
%\hspace{2mm}\code{"http://www."foo"bar.com"}\\
 | 
|
898  | 
%  | 
|
899  | 
%\end{frame}
 | 
|
| 3 | 900  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
901  | 
||
902  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
| 632 | 903  | 
%\begin{frame}[c]
 | 
904  | 
%\frametitle{Finding Operations in Scala}
 | 
|
905  | 
%  | 
|
906  | 
%{\bf\code{rexp.findAllIn(string)}}\medskip
 | 
|
907  | 
%  | 
|
908  | 
%returns a list of all (sub)strings that match the  | 
|
909  | 
%regular expression  | 
|
910  | 
%\bigskip\bigskip  | 
|
911  | 
%  | 
|
912  | 
%  | 
|
913  | 
%{\bf\code{rexp.findFirstIn(string)}}\medskip
 | 
|
914  | 
%  | 
|
915  | 
%returns either  | 
|
916  | 
%  | 
|
917  | 
%\begin{itemize}
 | 
|
918  | 
%\item \code{None} if no (sub)string matches or 
 | 
|
919  | 
%\item \code{Some(s)} with the first (sub)string
 | 
|
920  | 
%\end{itemize}
 | 
|
921  | 
%  | 
|
922  | 
%\end{frame}
 | 
|
| 0 | 923  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
924  | 
||
925  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
| 631 | 926  | 
%\begin{frame}[c]
 | 
927  | 
%  | 
|
928  | 
%\footnotesize  | 
|
929  | 
%\lstinputlisting{../progs/app2.scala}
 | 
|
930  | 
%  | 
|
931  | 
%\end{frame}
 | 
|
| 
254
 
dcd4688690ce
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
253 
diff
changeset
 | 
932  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 0 | 933  | 
|
| 
254
 
dcd4688690ce
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
253 
diff
changeset
 | 
934  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 631 | 935  | 
%\begin{frame}[c]
 | 
936  | 
%  | 
|
937  | 
%\small  | 
|
938  | 
%A version that only crawls links in ``my'' domain:\bigskip  | 
|
939  | 
%  | 
|
940  | 
%\footnotesize  | 
|
941  | 
%\lstinputlisting{../progs/app3.scala}
 | 
|
942  | 
%  | 
|
943  | 
%\end{frame}
 | 
|
| 0 | 944  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
945  | 
||
946  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
| 631 | 947  | 
%\begin{frame}[c]
 | 
948  | 
%\lstset{xleftmargin=-4mm}
 | 
|
949  | 
%\small  | 
|
950  | 
%A little email harvester:  | 
|
951  | 
%  | 
|
952  | 
%\footnotesize  | 
|
953  | 
%\lstinputlisting{../progs/app4.scala}\bigskip
 | 
|
954  | 
%  | 
|
955  | 
%\tiny  | 
|
956  | 
%\url{http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/}
 | 
|
957  | 
%  | 
|
958  | 
%\end{frame}
 | 
|
| 3 | 959  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
960  | 
||
961  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
| 14 | 962  | 
\begin{frame}[t]
 | 
| 757 | 963  | 
\frametitle{(Basic) Regular Expressions}
 | 
| 0 | 964  | 
|
| 
330
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
965  | 
Their inductive definition:  | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
966  | 
|
| 14 | 967  | 
|
| 
330
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
968  | 
\begin{textblock}{6}(2,7.5)
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
969  | 
  \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
 | 
| 512 | 970  | 
  \bl{$r$} & \bl{$::=$}  & \bl{$\ZERO$}  & nothing\\
 | 
| 
428
 
a47c4227a0c6
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
334 
diff
changeset
 | 
971  | 
         & \bl{$\mid$} & \bl{$\ONE$}       & empty string / \pcode{""} / $[]$\\
 | 
| 
330
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
972  | 
         & \bl{$\mid$} & \bl{$c$}                         & character\\
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
973  | 
         & \bl{$\mid$} & \bl{$r_1 + r_2$}  & alternative / choice\\
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
974  | 
         & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
975  | 
         & \bl{$\mid$} & \bl{$r^*$}            & star (zero or more)\\
 | 
| 3 | 976  | 
  \end{tabular}
 | 
977  | 
  \end{textblock}
 | 
|
978  | 
||
| 
330
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
979  | 
|
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
980  | 
\only<2->{\footnotesize
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
981  | 
\begin{textblock}{9}(2,0.5)
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
982  | 
\begin{bubble}[9.8cm]
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
983  | 
\lstinputlisting{../progs/app01.scala}
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
984  | 
\end{bubble}
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
985  | 
\end{textblock}}
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
986  | 
|
| 
254
 
dcd4688690ce
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
253 
diff
changeset
 | 
987  | 
\end{frame}
 | 
| 3 | 988  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
989  | 
||
990  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
| 506 | 991  | 
%\begin{frame}[t]
 | 
992  | 
%\frametitle{Regular Expressions}
 | 
|
993  | 
%  | 
|
994  | 
%\small  | 
|
995  | 
%In Scala:\bigskip  | 
|
996  | 
%  | 
|
997  | 
%\footnotesize  | 
|
998  | 
%\lstinputlisting{../progs/app51.scala}
 | 
|
999  | 
%  | 
|
1000  | 
%  | 
|
1001  | 
%\end{frame}
 | 
|
| 0 | 1002  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
1003  | 
||
1004  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1005  | 
\begin{frame}[t]
 | 
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1006  | 
\frametitle{Strings}
 | 
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1007  | 
|
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1008  | 
\ldots are lists of characters. For example \code{"hello"}
 | 
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1009  | 
|
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1010  | 
\begin{center}
 | 
| 
330
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1011  | 
\bl{$[h, e, l, l, o]$} or just \bl{$hello$}
 | 
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1012  | 
\end{center}
 | 
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1013  | 
|
| 
330
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1014  | 
the empty string: \bl{$[]$} or \bl{\pcode{""}}\bigskip\\
 | 
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1015  | 
|
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1016  | 
the concatenation of two strings:  | 
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1017  | 
|
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1018  | 
\begin{center}
 | 
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1019  | 
\bl{$s_1 \,@\, s_2$}
 | 
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1020  | 
\end{center}
 | 
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1021  | 
|
| 631 | 1022  | 
\bl{\textit{foo $@$ bar = foobar}}\\
 | 
1023  | 
\bl{\textit{baz $@\, []$ = baz}}
 | 
|
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1024  | 
|
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1025  | 
\end{frame}
 | 
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1026  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1027  | 
|
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1028  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 
330
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1029  | 
\begin{frame}[c]
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1030  | 
\frametitle{Languages, Strings}
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1031  | 
|
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1032  | 
\begin{itemize}
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1033  | 
\item \alert{\bf Strings} are lists of characters, for example
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1034  | 
\begin{center}
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1035  | 
\bl{$[]$},\;\bl{$abc$}  \hspace{2cm}(Pattern match: \bl{$c\!::\!s$})
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1036  | 
\end{center}\bigskip
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1037  | 
|
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1038  | 
|
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1039  | 
\item A \alert{\bf language} is a set of strings, for example\medskip
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1040  | 
\begin{center}
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1041  | 
\bl{$\{[], hello, \textit{foobar}, a, abc\}$}
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1042  | 
\end{center}\bigskip
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1043  | 
|
| 
332
 
4755ad4b457b
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
331 
diff
changeset
 | 
1044  | 
\item \alert{\bf Concatenation} of strings and languages
 | 
| 
330
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1045  | 
|
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1046  | 
\begin{center}
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1047  | 
\begin{tabular}{rcl}
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1048  | 
\bl{$\textit{foo}\;@\;bar$} & \bl{$=$} & \bl{$\textit{foobar}$}\medskip\\
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1049  | 
\bl{$A\;@\;B$} & \bl{$\dn$} & \bl{$\{ s_1\,@\,s_2 \;\mid\; s_1 \in A \wedge s_2 \in B\}$}
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1050  | 
\end{tabular}
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1051  | 
\end{center}
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1052  | 
|
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1053  | 
%\item The \alert{\bf meaning} of a regular expression is a set of 
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1054  | 
% strings, or language.  | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1055  | 
\end{itemize}  
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1056  | 
|
| 762 | 1057  | 
\only<2>{
 | 
1058  | 
\begin{textblock}{4}(10.5,8)
 | 
|
1059  | 
\small  | 
|
1060  | 
Let  | 
|
1061  | 
||
1062  | 
\bl{$A = \{foo, bar\}$} \bl{$B = \{a, b\}$}
 | 
|
1063  | 
\[  | 
|
1064  | 
\bl{A \,@\, B = \{fooa, foob, bara, barb\}}
 | 
|
1065  | 
\]  | 
|
1066  | 
\end{textblock}}  
 | 
|
1067  | 
||
| 
330
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1068  | 
\end{frame}
 | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1069  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1070  | 
|
| 761 | 1071  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
1072  | 
\begin{frame}[c]
 | 
|
| 762 | 1073  | 
  \frametitle{Two Corner Cases}
 | 
1074  | 
||
1075  | 
\Large  | 
|
1076  | 
  \begin{center}
 | 
|
1077  | 
  \bl{$A \,@\, \{[]\} = \;?$}\bigskip\bigskip\pause\\
 | 
|
1078  | 
  \bl{$A \,@\, \{\} = \;?$}
 | 
|
1079  | 
  \end{center}  
 | 
|
1080  | 
||
1081  | 
  \end{frame}
 | 
|
1082  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1083  | 
||
1084  | 
||
1085  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1086  | 
\begin{frame}[c]
 | 
|
| 761 | 1087  | 
\frametitle{The Meaning of a Regex}
 | 
1088  | 
||
1089  | 
...all the strings a regular expression can match.  | 
|
1090  | 
||
1091  | 
\begin{center}
 | 
|
1092  | 
 \begin{tabular}{rcl}
 | 
|
1093  | 
 \bl{$L(\ZERO)$}  & \bl{$\dn$} & \bl{$\{\}$}\\
 | 
|
1094  | 
 \bl{$L(\ONE)$}     & \bl{$\dn$} & \bl{$\{[]\}$}\\
 | 
|
1095  | 
 \bl{$L(c)$}            & \bl{$\dn$} & \bl{$\{[c]\}$}\\
 | 
|
1096  | 
 \bl{$L(r_1 + r_2)$}    & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
 | 
|
1097  | 
 \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \,@\, L(r_2)$}\\
 | 
|
1098  | 
 \bl{$L(r^*)$}           & \bl{$\dn$} & \\
 | 
|
1099  | 
  \end{tabular}
 | 
|
1100  | 
\end{center}
 | 
|
1101  | 
||
1102  | 
\begin{textblock}{14}(1.5,13.5)\small
 | 
|
1103  | 
\bl{$L$} is a function from regular expressions to 
 | 
|
1104  | 
sets of strings (languages):\smallskip\\  | 
|
1105  | 
\bl{\quad$L$ : Rexp $\Rightarrow$ Set$[$String$]$}
 | 
|
1106  | 
\end{textblock}
 | 
|
1107  | 
||
1108  | 
\end{frame}
 | 
|
1109  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1110  | 
||
| 
330
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1111  | 
|
| 0 | 1112  | 
|
| 5 | 1113  | 
|
| 631 | 1114  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
1115  | 
\begin{frame}[c]
 | 
|
1116  | 
  \frametitle{The Power Operation}
 | 
|
1117  | 
||
1118  | 
  \begin{itemize}
 | 
|
1119  | 
  \item The \alert{\textbf{\boldmath$n$th Power}} of a language:
 | 
|
1120  | 
||
1121  | 
  \begin{center}
 | 
|
1122  | 
  \begin{tabular}{lcl}
 | 
|
1123  | 
  \bl{$A^0$}    & \bl{$\dn$} & \bl{$\{[]\}$}\\
 | 
|
1124  | 
  \bl{$A^{n+1}$} & \bl{$\dn$} & \bl{$A \,@\, A^n$}
 | 
|
1125  | 
  \end{tabular}
 | 
|
1126  | 
  \end{center}\bigskip
 | 
|
1127  | 
||
1128  | 
\item[] For example  | 
|
1129  | 
||
1130  | 
  \begin{center}
 | 
|
1131  | 
  \begin{tabular}{lcl@{\hspace{10mm}}l}
 | 
|
1132  | 
  \bl{$A^4$} & \bl{$=$} & \bl{$A \,@\, A \,@\, A \,@\, A$} & \bl{$(@\,\{[]\})$}\\
 | 
|
1133  | 
  \bl{$A^1$} & \bl{$=$} & \bl{$A$} & \bl{$(@\,\{[]\})$}\\
 | 
|
1134  | 
  \bl{$A^0$} & \bl{$=$} & \bl{$\{[]\}$}\\
 | 
|
1135  | 
  \end{tabular}
 | 
|
1136  | 
  \end{center}
 | 
|
1137  | 
||
1138  | 
  \end{itemize}  
 | 
|
1139  | 
||
1140  | 
  \end{frame}
 | 
|
| 762 | 1141  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 631 | 1142  | 
|
| 743 | 1143  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
1144  | 
\begin{frame}[c]
 | 
|
| 745 | 1145  | 
\frametitle{The Meaning of a Regex}
 | 
| 743 | 1146  | 
|
| 762 | 1147  | 
\begin{textblock}{15}(1,4)
 | 
| 743 | 1148  | 
 \begin{tabular}{rcl}
 | 
1149  | 
 \bl{$L(\ZERO)$}  & \bl{$\dn$} & \bl{$\{\}$}\\
 | 
|
1150  | 
 \bl{$L(\ONE)$}     & \bl{$\dn$} & \bl{$\{[]\}$}\\
 | 
|
1151  | 
 \bl{$L(c)$}            & \bl{$\dn$} & \bl{$\{[c]\}$}\\
 | 
|
1152  | 
 \bl{$L(r_1 + r_2)$}    & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
 | 
|
| 762 | 1153  | 
 \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\
 | 
1154  | 
 \bl{$L(r^*)$}           & \bl{$\dn$} & \onslide<2->{\bl{$\bigcup_{0 \le n} L(r)^n$}}\\
 | 
|
1155  | 
  \end{tabular}\bigskip
 | 
|
1156  | 
||
1157  | 
%\onslide<2->{
 | 
|
1158  | 
%\hspace{5mm}\bl{$L(r)^0 \;\dn\; \{[]\}$}\\
 | 
|
1159  | 
%\bl{$L(r)^{n+1} \;\dn\; L(r) \,@\, L(r)^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\
 | 
|
1160  | 
%\small\hspace{5cm}\textcolor{gray}{$\{ s_1 @ s_2 \;|\; s_1\in L(r) \wedge s_2 \in L(r)^n \}$}}
 | 
|
1161  | 
%}  | 
|
| 743 | 1162  | 
\end{textblock}
 | 
1163  | 
||
1164  | 
\end{frame}
 | 
|
| 762 | 1165  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 743 | 1166  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
1167  | 
\begin{frame}[c]
 | 
|
| 631 | 1168  | 
  \frametitle{The Star Operation}
 | 
1169  | 
||
1170  | 
  \begin{itemize}
 | 
|
1171  | 
  \item The \alert{\bf Kleene Star} of a \underline{language}:
 | 
|
1172  | 
\bigskip  | 
|
1173  | 
||
1174  | 
  \begin{center}
 | 
|
1175  | 
  \begin{tabular}{c}
 | 
|
1176  | 
  \bl{$A\star \dn \bigcup_{0\le n} A^n$}
 | 
|
1177  | 
  \end{tabular}
 | 
|
1178  | 
  \end{center}\bigskip
 | 
|
1179  | 
||
1180  | 
\item[] This expands to  | 
|
1181  | 
||
1182  | 
\[  | 
|
1183  | 
  \bl{A^0 \cup A^1 \cup A^2 \cup A^3 \cup A^4 \cup \ldots}
 | 
|
1184  | 
\]  | 
|
1185  | 
||
1186  | 
or  | 
|
1187  | 
||
1188  | 
\small  | 
|
1189  | 
\[  | 
|
1190  | 
  \bl{\{[]\} \;\cup\; A \;\cup\; A\,@\,A \;\cup\; 
 | 
|
1191  | 
A\,@\,A\,@\,A \;\cup\; A\,@\,A\,@\,A\,@\,A \cup \ldots}  | 
|
1192  | 
\]  | 
|
1193  | 
||
1194  | 
  \end{itemize}  
 | 
|
1195  | 
||
1196  | 
  \end{frame}
 | 
|
1197  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
| 762 | 1198  | 
|
1199  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1200  | 
\begin{frame}[c]
 | 
|
1201  | 
\frametitle{The Meaning of a Regex}
 | 
|
1202  | 
||
1203  | 
\begin{textblock}{15}(1,4)
 | 
|
1204  | 
 \begin{tabular}{rcl}
 | 
|
1205  | 
 \bl{$L(\ZERO)$}  & \bl{$\dn$} & \bl{$\{\}$}\\
 | 
|
1206  | 
 \bl{$L(\ONE)$}     & \bl{$\dn$} & \bl{$\{[]\}$}\\
 | 
|
1207  | 
 \bl{$L(c)$}            & \bl{$\dn$} & \bl{$\{[c]\}$}\\
 | 
|
1208  | 
 \bl{$L(r_1 + r_2)$}    & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
 | 
|
1209  | 
 \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\
 | 
|
1210  | 
 \bl{$L(r^*)$}           & \bl{$\dn$} & \bl{$(L(r))\star$}\\
 | 
|
1211  | 
  \end{tabular}
 | 
|
| 631 | 1212  | 
|
| 762 | 1213  | 
\end{textblock}
 | 
1214  | 
||
1215  | 
\end{frame}
 | 
|
1216  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1217  | 
||
1218  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1219  | 
\begin{frame}[c]
 | 
|
1220  | 
\frametitle{The Meaning of Matching}
 | 
|
1221  | 
||
1222  | 
\begin{bubble}[10cm]
 | 
|
1223  | 
\large\bf  | 
|
1224  | 
A regular expression \bl{$r$} matches a string~\bl{$s$} 
 | 
|
1225  | 
provided  | 
|
1226  | 
||
1227  | 
\begin{center}
 | 
|
1228  | 
\bl{$s \in L(r)$}\\ 
 | 
|
1229  | 
\end{center}
 | 
|
1230  | 
\end{bubble}\bigskip\bigskip
 | 
|
1231  | 
||
1232  | 
\ldots and the point of the next lecture is  | 
|
1233  | 
to decide this problem as fast as possible (unlike Python,  | 
|
1234  | 
Ruby, Java)  | 
|
1235  | 
||
1236  | 
\end{frame}
 | 
|
1237  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1238  | 
||
1239  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1240  | 
\begin{frame}[c]
 | 
|
1241  | 
  \frametitle{Questions}
 | 
|
1242  | 
||
1243  | 
  \begin{itemize}
 | 
|
1244  | 
  \item Say \bl{$A = \{[a],[b],[c],[d]\}$}.\bigskip
 | 
|
1245  | 
||
1246  | 
\item[]  | 
|
1247  | 
  How many strings are in \bl{$A^4$}\,?
 | 
|
1248  | 
\bigskip\medskip\pause  | 
|
1249  | 
||
1250  | 
||
1251  | 
\item[]  | 
|
1252  | 
  What if \bl{$A = \{[a],[b],[c],[]\}$};\\ 
 | 
|
1253  | 
  how many strings are then in \bl{$A^4$}\,?
 | 
|
1254  | 
  \end{itemize}  
 | 
|
1255  | 
||
1256  | 
\end{frame}
 | 
|
1257  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1258  | 
||
1259  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1260  | 
% \begin{frame}[c]
 | 
|
1261  | 
% \frametitle{Languages (Sets of Strings)}
 | 
|
1262  | 
||
1263  | 
% \begin{itemize}
 | 
|
1264  | 
||
1265  | 
% \item A \alert{\bf Language} is a set of strings, for example\medskip
 | 
|
1266  | 
% \begin{center}
 | 
|
1267  | 
% \bl{$\{[], hello, foobar, a, abc\}$}
 | 
|
1268  | 
% \end{center}\bigskip
 | 
|
1269  | 
||
1270  | 
% \item \alert{\bf Concatenation} for strings and languages
 | 
|
1271  | 
||
1272  | 
% \begin{center}
 | 
|
1273  | 
% \begin{tabular}{rcl}
 | 
|
1274  | 
% \bl{$foo\;@\;bar$} & \bl{$=$} & \bl{$foobar$}\medskip\\
 | 
|
1275  | 
% \bl{$A\;@\;B$}     & \bl{$\dn$} & \bl{$\{ s_1\,@\,s_2 \;\mid\; s_1 \in A \wedge s_2 \in B\}$}
 | 
|
1276  | 
% \end{tabular}
 | 
|
1277  | 
% \end{center}
 | 
|
1278  | 
% \bigskip  | 
|
1279  | 
||
1280  | 
% \small  | 
|
1281  | 
% \item [] For example \bl{$A = \{foo, bar\}$}, \bl{$B = \{a, b\}$}
 | 
|
1282  | 
||
1283  | 
% \[  | 
|
1284  | 
% \bl{A \,@\, B = \{fooa, foob, bara, barb\}}
 | 
|
1285  | 
% \]  | 
|
1286  | 
||
1287  | 
||
1288  | 
||
1289  | 
||
1290  | 
% \end{itemize}  
 | 
|
1291  | 
% \end{frame}
 | 
|
1292  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1293  | 
||
1294  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1295  | 
% \begin{frame}[c]
 | 
|
1296  | 
%   \frametitle{Two Corner Cases}
 | 
|
1297  | 
||
1298  | 
% \Large  | 
|
1299  | 
%   \begin{center}
 | 
|
1300  | 
%   \bl{$A \,@\, \{[]\} = \;?$}\bigskip\bigskip\pause\\
 | 
|
1301  | 
%   \bl{$A \,@\, \{\} = \;?$}
 | 
|
1302  | 
%   \end{center}  
 | 
|
1303  | 
||
1304  | 
%   \end{frame}
 | 
|
1305  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1306  | 
||
1307  | 
||
1308  | 
||
1309  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1310  | 
% \begin{frame}[c]
 | 
|
1311  | 
% \frametitle{The Meaning of a Regex}
 | 
|
1312  | 
||
1313  | 
% ...all the strings a regular expression can match.  | 
|
1314  | 
||
1315  | 
% \begin{center}
 | 
|
1316  | 
%  \begin{tabular}{rcl}
 | 
|
1317  | 
%  \bl{$L(\ZERO)$}  & \bl{$\dn$} & \bl{$\{\}$}\\
 | 
|
1318  | 
%  \bl{$L(\ONE)$}     & \bl{$\dn$} & \bl{$\{[]\}$}\\
 | 
|
1319  | 
%  \bl{$L(c)$}            & \bl{$\dn$} & \bl{$\{[c]\}$}\\
 | 
|
1320  | 
%  \bl{$L(r_1 + r_2)$}    & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
 | 
|
1321  | 
%  \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \,@\, L(r_2)$}\\
 | 
|
1322  | 
%  \bl{$L(r^*)$}           & \bl{$\dn$} & \\
 | 
|
1323  | 
%   \end{tabular}
 | 
|
1324  | 
% \end{center}
 | 
|
1325  | 
||
1326  | 
% \begin{textblock}{14}(1.5,13.5)\small
 | 
|
1327  | 
% \bl{$L$} is a function from regular expressions to 
 | 
|
1328  | 
% sets of strings (languages):\smallskip\\  | 
|
1329  | 
% \bl{\quad$L$ : Rexp $\Rightarrow$ Set$[$String$]$}
 | 
|
1330  | 
% \end{textblock}
 | 
|
1331  | 
||
1332  | 
% \end{frame}
 | 
|
1333  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1334  | 
||
1335  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1336  | 
% \begin{frame}[c]
 | 
|
1337  | 
% \frametitle{The Power Operation}
 | 
|
1338  | 
||
1339  | 
% \begin{itemize}
 | 
|
1340  | 
% \item The \alert{\textbf{\boldmath$n$th Power}} of a language:
 | 
|
1341  | 
||
1342  | 
% \begin{center}
 | 
|
1343  | 
% \begin{tabular}{lcl}
 | 
|
1344  | 
% \bl{$A^0$}    & \bl{$\dn$} & \bl{$\{[]\}$}\\
 | 
|
1345  | 
% \bl{$A^{n+1}$} & \bl{$\dn$} & \bl{$A \,@\, A^n$}
 | 
|
1346  | 
% \end{tabular}
 | 
|
1347  | 
% \end{center}\bigskip
 | 
|
1348  | 
||
1349  | 
% \item[] For example  | 
|
1350  | 
||
1351  | 
% \begin{center}
 | 
|
1352  | 
% \begin{tabular}{lcl@{\hspace{10mm}}l}
 | 
|
1353  | 
% \bl{$A^4$} & \bl{$=$} & \bl{$A \,@\, A \,@\, A \,@\, A$} & \bl{$(@\,\{[]\})$}\\
 | 
|
1354  | 
% \bl{$A^1$} & \bl{$=$} & \bl{$A$} & \bl{$(@\,\{[]\})$}\\
 | 
|
1355  | 
% \bl{$A^0$} & \bl{$=$} & \bl{$\{[]\}$}\\
 | 
|
1356  | 
% \end{tabular}
 | 
|
1357  | 
% \end{center}
 | 
|
1358  | 
||
1359  | 
% \end{itemize}  
 | 
|
1360  | 
||
1361  | 
% \end{frame}
 | 
|
1362  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1363  | 
||
1364  | 
||
| 631 | 1365  | 
|
| 0 | 1366  | 
|
1367  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
| 762 | 1368  | 
% \begin{frame}[c]
 | 
1369  | 
% \frametitle{Written Exam}
 | 
|
| 
253
 
75c469893514
added coursework
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
252 
diff
changeset
 | 
1370  | 
|
| 762 | 1371  | 
% \begin{itemize}
 | 
1372  | 
% \item Accounts for 80\%.\bigskip  | 
|
| 
253
 
75c469893514
added coursework
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
252 
diff
changeset
 | 
1373  | 
|
| 762 | 1374  | 
% \item The question ``\textit{Is this relevant for
 | 
1375  | 
% the exam?}'' is very demotivating for the lecturer!\bigskip\\  | 
|
| 
253
 
75c469893514
added coursework
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
252 
diff
changeset
 | 
1376  | 
|
| 762 | 1377  | 
% \item Deal: Whatever is in the homework (and is not marked  | 
1378  | 
%       ``\textit{optional}'') is relevant for the exam.\bigskip
 | 
|
| 
330
 
0806e45d873c
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
328 
diff
changeset
 | 
1379  | 
|
| 762 | 1380  | 
% \item Each lecture has also a handout. There are also handouts about  | 
1381  | 
% notation and Scala.  | 
|
1382  | 
% \end{itemize}
 | 
|
| 
253
 
75c469893514
added coursework
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
252 
diff
changeset
 | 
1383  | 
|
| 762 | 1384  | 
% \end{frame}
 | 
| 
253
 
75c469893514
added coursework
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
252 
diff
changeset
 | 
1385  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 
 
75c469893514
added coursework
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
252 
diff
changeset
 | 
1386  | 
|
| 
 
75c469893514
added coursework
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
252 
diff
changeset
 | 
1387  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 762 | 1388  | 
% \begin{frame}[t]
 | 
1389  | 
% \frametitle{Coursework}
 | 
|
| 0 | 1390  | 
|
| 762 | 1391  | 
% \begin{itemize}
 | 
1392  | 
% \item Accounts for 20\%. Two strands. Choose \alert{\bf one}!\bigskip
 | 
|
1393  | 
% \end{itemize}
 | 
|
| 0 | 1394  | 
|
| 762 | 1395  | 
% \begin{columns}[t]
 | 
1396  | 
% \begin{column}{.5\textwidth}
 | 
|
1397  | 
% \underline{\bf Strand 1}\medskip
 | 
|
1398  | 
% \begin{itemize}
 | 
|
1399  | 
% \item 4 programming tasks:  | 
|
1400  | 
% \begin{itemize}
 | 
|
1401  | 
% \item matcher (4\%, 11.10.)  | 
|
1402  | 
% \item lexer (5\%, 04.11.)  | 
|
1403  | 
% \item parser (5\%, 22.11.)  | 
|
1404  | 
% \item compiler (6\%, 13.12.)  | 
|
1405  | 
% \end{itemize}
 | 
|
1406  | 
% \item in any lang.~you like,\\ but I want to see the\\ code  | 
|
1407  | 
% \end{itemize}
 | 
|
1408  | 
% \end{column}
 | 
|
| 
253
 
75c469893514
added coursework
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
252 
diff
changeset
 | 
1409  | 
|
| 762 | 1410  | 
% \hspace{-45pt}\vrule{}\hspace{10pt}
 | 
1411  | 
% \begin{column}{.5\textwidth}
 | 
|
1412  | 
% \underline{\bf Strand 2}\smallskip\begin{itemize}
 | 
|
1413  | 
% \item one task: prove the correctness of a regular expression matcher in  | 
|
1414  | 
% the \underline{Isabelle} theorem prover
 | 
|
1415  | 
% \item 20\%, submission on~13.12.\hspace{-5mm}\mbox{}
 | 
|
1416  | 
% \end{itemize}
 | 
|
1417  | 
% \end{column}
 | 
|
1418  | 
% \end{columns}\medskip
 | 
|
| 
253
 
75c469893514
added coursework
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
252 
diff
changeset
 | 
1419  | 
|
| 762 | 1420  | 
% \small  | 
1421  | 
% \begin{itemize}
 | 
|
1422  | 
% \item Solving more than one strand will {\bf not} give you more 
 | 
|
1423  | 
% marks.  | 
|
| 506 | 1424  | 
|
| 762 | 1425  | 
% \end{itemize}
 | 
| 
253
 
75c469893514
added coursework
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
252 
diff
changeset
 | 
1426  | 
|
| 762 | 1427  | 
% \end{frame}
 | 
| 0 | 1428  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
1429  | 
||
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1430  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 745 | 1431  | 
%\begin{frame}[c]
 | 
1432  | 
%\frametitle{Lecture Capture}
 | 
|
1433  | 
%  | 
|
1434  | 
%\begin{itemize}
 | 
|
1435  | 
%\item Hope it works\ldots\pause actually no, it does not!\medskip\pause  | 
|
1436  | 
%\item It is important to use lecture capture wisely\\ (it is only the ``baseline''):  | 
|
1437  | 
%\begin{itemize}  
 | 
|
1438  | 
%\item Lecture recordings are a study and revision aid.  | 
|
1439  | 
%\item Statistically, there is a clear and direct link between attendance and  | 
|
1440  | 
% attainment: students who do not attend lectures, do less well in exams.  | 
|
1441  | 
%\end{itemize}
 | 
|
1442  | 
%  | 
|
1443  | 
%\item Attending a lecture is more than watching it online -- if you do not  | 
|
1444  | 
%attend, you miss out!  | 
|
1445  | 
%  | 
|
1446  | 
%\end{itemize}
 | 
|
1447  | 
%  | 
|
1448  | 
%\end{frame}
 | 
|
| 506 | 1449  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
1450  | 
||
1451  | 
||
1452  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1453  | 
\begin{frame}[c]
 | 
|
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1454  | 
\frametitle{\begin{tabular}{c}\\[3cm]\alert{Questions?}\end{tabular}}
 | 
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1455  | 
|
| 769 | 1456  | 
|
1457  | 
\begin{tabular}{lll}
 | 
|
1458  | 
TAs: & Anton Luca-Dorin & (took the module last year)\\  | 
|
1459  | 
& Chengsong Tan & (PhD student working on derivatives)  | 
|
1460  | 
\end{tabular}  
 | 
|
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1461  | 
\mbox{}
 | 
| 
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1462  | 
\end{frame}
 | 
| 769 | 1463  | 
|
1464  | 
\begin{frame}[c]
 | 
|
1465  | 
\begin{mybox3}{Coursework}
 | 
|
1466  | 
Do we need to provide instructions on running the coursework files  | 
|
1467  | 
if we're using languages other than Scala? Thanks  | 
|
1468  | 
\end{mybox3}\pause
 | 
|
1469  | 
||
1470  | 
\begin{mybox2}{Zip-File for Coursework}
 | 
|
1471  | 
Please, please submit a zipfile that generates a subdirectory  | 
|
1472  | 
  \begin{center}
 | 
|
1473  | 
  \texttt{NameFamilyName}  
 | 
|
1474  | 
  \end{center}  
 | 
|
1475  | 
\end{mybox2}
 | 
|
1476  | 
\end{frame}
 | 
|
1477  | 
||
1478  | 
||
1479  | 
\begin{frame}[c]
 | 
|
1480  | 
\begin{mybox3}{Coursework}
 | 
|
1481  | 
What is the purpose of the workshop session on the timetable?  | 
|
1482  | 
||
1483  | 
Slightly confused about how to undertake cw1 and what exactly we  | 
|
1484  | 
should be implementing. This is more for clarification of the cw1  | 
|
1485  | 
structure, including the implementation and questions present in  | 
|
1486  | 
cw1.  | 
|
1487  | 
\end{mybox3}
 | 
|
1488  | 
\end{frame}
 | 
|
1489  | 
||
1490  | 
\begin{frame}[c]
 | 
|
1491  | 
\begin{mybox3}{What is the trick?}\small
 | 
|
1492  | 
What was the trick to improve the evil regular expressions matcher  | 
|
1493  | 
to have such good results compared to other programming languages?  | 
|
1494  | 
Is it working better on casual regular expressions (the ones that  | 
|
1495  | 
Python and Java handle pretty well), too? Or was it just optimised  | 
|
1496  | 
for these evil ones?  | 
|
1497  | 
\end{mybox3}
 | 
|
1498  | 
||
1499  | 
\begin{mybox3}{}\small
 | 
|
1500  | 
It was shown in the lectures that the pattern matching algorithms  | 
|
1501  | 
currently implemented in popular programming languages (Python, JS,  | 
|
1502  | 
Java, etc) are far slower than the algorithm we are going to be  | 
|
1503  | 
implementing in this module. My question is why do these programming  | 
|
1504  | 
languages not implement the algorithm that we are going to implement  | 
|
1505  | 
in this module?  | 
|
1506  | 
\end{mybox3}
 | 
|
1507  | 
\end{frame}
 | 
|
1508  | 
||
1509  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1510  | 
\begin{frame}[c]
 | 
|
1511  | 
  \frametitle{Thanks to Martin Mikusovic}
 | 
|
1512  | 
||
1513  | 
\bigskip  | 
|
1514  | 
\begin{center}
 | 
|
1515  | 
\begin{tikzpicture}
 | 
|
1516  | 
  \begin{axis}[
 | 
|
1517  | 
    xlabel={$n$},
 | 
|
1518  | 
    x label style={at={(1.05,0.0)}},
 | 
|
1519  | 
    ylabel={time in secs},
 | 
|
1520  | 
enlargelimits=false,  | 
|
1521  | 
    xtick={0,5,...,30},
 | 
|
1522  | 
xmax=33,  | 
|
1523  | 
ymax=35,  | 
|
1524  | 
    ytick={0,10,...,30},
 | 
|
1525  | 
scaled ticks=false,  | 
|
1526  | 
axis lines=left,  | 
|
1527  | 
width=9cm,  | 
|
1528  | 
height=5.5cm,  | 
|
1529  | 
    legend entries={Java 8, Python, JavaScript, Swift},  
 | 
|
1530  | 
legend pos=north west,  | 
|
1531  | 
legend cell align=left]  | 
|
1532  | 
\addplot[blue,mark=*, mark options={fill=white}] table {re-python2.data};
 | 
|
1533  | 
\addplot[cyan,mark=*, mark options={fill=white}] table {re-java.data};
 | 
|
1534  | 
\addplot[red,mark=*, mark options={fill=white}] table {re-js.data};
 | 
|
1535  | 
\addplot[magenta,mark=*, mark options={fill=white}] table {re-swift.data};
 | 
|
1536  | 
\end{axis}
 | 
|
1537  | 
\end{tikzpicture}
 | 
|
1538  | 
\end{center}
 | 
|
1539  | 
||
1540  | 
Regex: \bl{$(a^*)^* \cdot b$}
 | 
|
1541  | 
||
1542  | 
Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
 | 
|
1543  | 
||
1544  | 
\end{frame}
 | 
|
1545  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1546  | 
||
1547  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1548  | 
\begin{frame}[c]
 | 
|
1549  | 
\frametitle{Same Example in Java 9+}
 | 
|
1550  | 
||
1551  | 
\begin{center}
 | 
|
1552  | 
\begin{tikzpicture}
 | 
|
1553  | 
  \begin{axis}[
 | 
|
1554  | 
    xlabel={$n$},
 | 
|
1555  | 
    x label style={at={(1.09,-0.15)}},
 | 
|
1556  | 
    ylabel={time in secs},
 | 
|
1557  | 
scaled x ticks=false,  | 
|
1558  | 
enlargelimits=false,  | 
|
1559  | 
xtick distance=10000,  | 
|
1560  | 
xmax=44000,  | 
|
1561  | 
    ytick={0,10,...,30}, 
 | 
|
1562  | 
ymax=35,  | 
|
1563  | 
axis lines=left,  | 
|
1564  | 
width=9cm,  | 
|
1565  | 
height=5cm,  | 
|
1566  | 
    legend entries={Java \liningnums{9}+},
 | 
|
1567  | 
legend pos=north west,  | 
|
1568  | 
legend cell align=left]  | 
|
1569  | 
\addplot[blue,mark=square*,mark options={fill=white}] table {re-java9.data};
 | 
|
1570  | 
\end{axis}
 | 
|
1571  | 
\end{tikzpicture}
 | 
|
1572  | 
\end{center}
 | 
|
1573  | 
||
1574  | 
Regex: \bl{$(a^*)^* \cdot b$}
 | 
|
1575  | 
||
1576  | 
Strings of the form \bl{$\underbrace{\,a\ldots a\,}_{n}$}
 | 
|
1577  | 
||
1578  | 
\end{frame}
 | 
|
1579  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
|
1580  | 
||
1581  | 
\begin{frame}[c]
 | 
|
1582  | 
\begin{mybox3}{}
 | 
|
1583  | 
Are there any (common) languages that have a built-in regex  | 
|
1584  | 
implementation matching the set of functions of a formal 'simple'  | 
|
1585  | 
regular expression, as opposed to an 'extended' regular expression  | 
|
1586  | 
implemented in most regex-supporting languages?  | 
|
1587  | 
\end{mybox3}
 | 
|
1588  | 
\end{frame}
 | 
|
1589  | 
||
1590  | 
\begin{frame}[c]
 | 
|
1591  | 
\begin{mybox3}{Passing Mark}
 | 
|
1592  | 
I believe the assessment is 70\% coursework (broken into 10\% weekly  | 
|
1593  | 
stuff, 15\% mid term exam and 45\% CW in any programming language)  | 
|
1594  | 
and 30\% January exam. However, I would like to know if we just need  | 
|
1595  | 
40\% overall to pass the module or pass the each component  | 
|
1596  | 
individually?  | 
|
1597  | 
\end{mybox3}
 | 
|
1598  | 
||
1599  | 
\hfill$\Rightarrow$ 40\% overall  | 
|
1600  | 
\end{frame}
 | 
|
1601  | 
||
1602  | 
\begin{frame}[c]
 | 
|
1603  | 
\begin{mybox3}{Regexes}
 | 
|
1604  | 
Can we determine all the possible regular expressions matching a  | 
|
1605  | 
certain string? If we take into account all the possible ways to  | 
|
1606  | 
  combine the operations: \bl{$\ZERO$}, \bl{$\ONE$},
 | 
|
1607  | 
  \bl{$r_1 + r_2$}, \bl{$r_1 \cdot r_2$}, \bl{$r^*$}?
 | 
|
1608  | 
\end{mybox3}
 | 
|
1609  | 
\end{frame}
 | 
|
1610  | 
||
1611  | 
\begin{frame}[c]
 | 
|
1612  | 
\begin{mybox3}{\bl{$L$} + Equivalence}
 | 
|
1613  | 
When we explain why two regular expressions are not equivalent, what  | 
|
1614  | 
method is better for us, using mathematics formulas or making an  | 
|
1615  | 
example?  | 
|
1616  | 
\end{mybox3}
 | 
|
1617  | 
\begin{mybox3}{}
 | 
|
1618  | 
Meaning of Regex and Operations  | 
|
1619  | 
\end{mybox3}
 | 
|
1620  | 
\end{frame}
 | 
|
1621  | 
||
1622  | 
\begin{frame}[c]
 | 
|
1623  | 
\begin{mybox3}{\bl{$L$}}
 | 
|
1624  | 
Can the function L be applied to anything other than regular  | 
|
1625  | 
expressions? For example would L(L(c)) return anything?  | 
|
1626  | 
\end{mybox3}
 | 
|
1627  | 
||
1628  | 
\hfill $\Rightarrow$ No  | 
|
1629  | 
\end{frame} 
 | 
|
1630  | 
||
1631  | 
\begin{frame}[c]
 | 
|
1632  | 
\begin{mybox3}{\bl{$(a?)\{n\} \cdot a\{n\}$}}
 | 
|
1633  | 
In the evil regexes section, is there any reason why in the regex  | 
|
1634  | 
  \texttt{[a?]\{n\}[a]\{n\}} the square brackets are used? It is defined as a
 | 
|
1635  | 
single character from the square brackets, however there is just one  | 
|
1636  | 
character, so it seems like it is not necessary. Maybe it is just  | 
|
1637  | 
necessary for the first part, because ? is a token instead of a  | 
|
1638  | 
character and we need to refer to a? as a ``unit''? Could regular  | 
|
1639  | 
brackets be used instead? Is there any difference apart from the  | 
|
1640  | 
fact that it would create a group? Also, are the regexes  | 
|
1641  | 
  \texttt{[a?]\{n\}} and
 | 
|
1642  | 
  \texttt{a\{0,3\}} equivalent?
 | 
|
1643  | 
\end{mybox3}
 | 
|
1644  | 
\end{frame} 
 | 
|
1645  | 
||
1646  | 
\begin{frame}[c]
 | 
|
1647  | 
\begin{mybox3}{Python + Parser Combinators (CW3)}\small
 | 
|
1648  | 
Hi Christian,  | 
|
1649  | 
||
1650  | 
I don’t see a problem: you certainly have higher order functions and  | 
|
1651  | 
it is easy to implement algebraic data types using classes. As far  | 
|
1652  | 
as I can see that’s all you need. You don’t get the static types but  | 
|
1653  | 
that should be obvious. Basically if you can do it in LISP you can  | 
|
1654  | 
do it in Python. The only problem could be stack overflows due to a  | 
|
1655  | 
lack of tail recursion optimisation. On the other hand you can  | 
|
1656  | 
simulate laziness using generators.  | 
|
1657  | 
||
1658  | 
Cheers,  | 
|
1659  | 
Thorsten  | 
|
1660  | 
\end{mybox3}
 | 
|
1661  | 
||
1662  | 
Trees \url{https://youtu.be/7tCNu4CnjVc}
 | 
|
1663  | 
||
1664  | 
Laziness \url{https://youtu.be/5jwV3zxXc8E}
 | 
|
1665  | 
||
1666  | 
\end{frame}
 | 
|
1667  | 
||
1668  | 
\begin{frame}[c]
 | 
|
1669  | 
\begin{mybox3}{}
 | 
|
1670  | 
What suggestions do you have for us to get the most out of this  | 
|
1671  | 
module, especially in the online format? I.e. form discussion  | 
|
1672  | 
groups, will you have office hours?  | 
|
1673  | 
\end{mybox3}
 | 
|
1674  | 
||
1675  | 
\small  | 
|
1676  | 
\hfill $\Rightarrow$\mbox{} Discussion Forum on KEATS
 | 
|
1677  | 
||
1678  | 
\hfill online tutorial sessions  | 
|
1679  | 
||
1680  | 
\hfill ???  | 
|
1681  | 
||
1682  | 
\hfill PL-groups for ``exotic'' langs  | 
|
1683  | 
\end{frame}
 | 
|
1684  | 
||
1685  | 
\begin{frame}[c]
 | 
|
1686  | 
\small  | 
|
1687  | 
\begin{mybox3}{}
 | 
|
1688  | 
Where do most students struggle with this module? What will the format  | 
|
1689  | 
of the exam be? What is the most efficient way of studying for the  | 
|
1690  | 
exam? There are plenty of resources available on KEATS, but is there  | 
|
1691  | 
anything else you'd recommend us to study? Although (just by skimming  | 
|
1692  | 
the headings) the module seems to be a combination of practical and  | 
|
1693  | 
theoretical matters, exactly in what field would the syllabus be  | 
|
1694  | 
applied? Besides these questions and the ones other students asked, is  | 
|
1695  | 
there anything else we should know? Thank you!  | 
|
1696  | 
\end{mybox3}
 | 
|
1697  | 
\end{frame}
 | 
|
1698  | 
||
1699  | 
||
1700  | 
\begin{frame}[c]
 | 
|
1701  | 
\end{frame}
 | 
|
1702  | 
||
1703  | 
\begin{frame}[c]
 | 
|
1704  | 
\end{frame}
 | 
|
1705  | 
||
1706  | 
\begin{frame}[c]
 | 
|
1707  | 
\end{frame}
 | 
|
1708  | 
||
1709  | 
\begin{frame}[c]
 | 
|
1710  | 
\end{frame}
 | 
|
1711  | 
||
1712  | 
\begin{frame}[c]
 | 
|
1713  | 
\end{frame}
 | 
|
1714  | 
||
| 
255
 
96a99237fa42
updated
 
Christian Urban <christian dot urban at kcl dot ac dot uk> 
parents: 
254 
diff
changeset
 | 
1715  | 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  | 
| 0 | 1716  | 
\end{document}
 | 
1717  | 
||
1718  | 
%%% Local Variables:  | 
|
1719  | 
%%% mode: latex  | 
|
1720  | 
%%% TeX-master: t  | 
|
1721  | 
%%% End:  | 
|
1722  |