47 \end{tabular} |
47 \end{tabular} |
48 \end{center} |
48 \end{center} |
49 |
49 |
50 |
50 |
51 \end{frame} |
51 \end{frame} |
52 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
52 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
53 |
|
54 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
55 \begin{frame}[c] |
|
56 \frametitle{The Goal of this Course} |
|
57 |
|
58 \begin{center} |
|
59 \begin{tikzpicture}[scale=1, |
|
60 node/.style={ |
|
61 rectangle,rounded corners=3mm, |
|
62 very thick,draw=black!50,minimum height=18mm, minimum width=20mm, |
|
63 top color=white,bottom color=black!20}] |
|
64 |
|
65 \node at (3.05, 1.8) {\Large\bf A Compiler}; |
|
66 |
|
67 \node (0) at (-2.3,0) {}; |
|
68 |
|
69 \node (A) at (0,0) [node] {}; |
|
70 \node [below right] at (A.north west) {lexer}; |
|
71 |
|
72 \node (B) at (3,0) [node] {}; |
|
73 \node [below right=1mm] at (B.north west) {\mbox{}\hspace{-1mm}parser}; |
|
74 |
|
75 \node (C) at (6,0) [node] {}; |
|
76 \node [below right] at (C.north west) {\mbox{}\hspace{-1mm}code gen}; |
|
77 |
|
78 \node (1) at (8.4,0) {}; |
|
79 |
|
80 \draw [->,line width=4mm] (0) -- (A); |
|
81 \draw [->,line width=4mm] (A) -- (B); |
|
82 \draw [->,line width=4mm] (B) -- (C); |
|
83 \draw [->,line width=4mm] (C) -- (1); |
|
84 \end{tikzpicture} |
|
85 \end{center} |
|
86 |
|
87 \only<2,3>{ |
|
88 \begin{textblock}{1}(1,2) |
|
89 \begin{bubble}[9.8cm] |
|
90 \normalsize |
|
91 lexer input: a string\smallskip\\ |
|
92 \hspace{5mm}\code{"read(n);"}\medskip\\ |
|
93 lexer output: a sequence of tokens\smallskip\\ |
|
94 \hspace{5mm}\code{key(read); lpar; id(n); rpar; semi} |
|
95 \end{bubble} |
|
96 \end{textblock}} |
|
97 |
|
98 \only<3>{ |
|
99 \begin{textblock}{1}(6,7.8) |
|
100 \begin{tabular}{c} |
|
101 \includegraphics[scale=0.2]{../pics/rosetta.jpg}\\[-2mm] |
|
102 \footnotesize lexing $\Rightarrow$ recognising words (Stone of Rosetta) |
|
103 \end{tabular} |
|
104 \end{textblock}} |
|
105 |
|
106 \only<4>{ |
|
107 \begin{textblock}{1}(1,1.5) |
|
108 \begin{bubble}[8.5cm] |
|
109 \normalsize |
|
110 parser input: a sequence of token\smallskip\\ |
|
111 parser output: an abstract syntax tree\smallskip\\ |
|
112 \footnotesize |
|
113 \hspace{2cm}\begin{tikzpicture} |
|
114 \node {\code{read}} |
|
115 child {node {\code{lpar}}} |
|
116 child {node {\code{n}}} |
|
117 child {node {\code{rpar}}}; |
|
118 \end{tikzpicture} |
|
119 \end{bubble} |
|
120 \end{textblock}} |
|
121 |
|
122 \only<5,6>{ |
|
123 \begin{textblock}{1}(1,1.5) |
|
124 \begin{bubble}[4cm] |
|
125 \normalsize |
|
126 code generator:\smallskip\\ |
|
127 \hspace{5mm}\code{istore 2}\\ |
|
128 \hspace{5mm}\code{iload 2}\\ |
|
129 \hspace{5mm}\code{ldc 10}\\ |
|
130 \hspace{5mm}\code{isub}\\ |
|
131 \hspace{5mm}\code{ifeq Label2}\\ |
|
132 \hspace{5mm}\code{iload 2}\\ |
|
133 \hspace{5mm}\code{...}\\ |
|
134 \end{bubble} |
|
135 \end{textblock}} |
|
136 |
|
137 \only<6>{ |
|
138 \begin{textblock}{6}(8.4,7) |
|
139 \begin{bubble}[5cm] |
|
140 \mbox{\begin{tikzpicture}[scale=0.58,rounded corners=0mm] |
|
141 \begin{axis}[axis x line=bottom, axis y line=left, ylabel=secs, |
|
142 xlabel=n, |
|
143 enlargelimits=0.05, |
|
144 ybar interval=0.7, legend style=small] |
|
145 \addplot file {interpreted2.data}; |
|
146 \addplot file {compiled2.data}; |
|
147 %\legend{interpreted, compiled} |
|
148 \end{axis} |
|
149 \end{tikzpicture}} |
|
150 \end{bubble} |
|
151 \end{textblock}} |
|
152 |
|
153 \end{frame} |
|
154 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
155 |
|
156 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
157 \begin{frame}[c] |
|
158 \frametitle{The subject is quite old} |
|
159 |
|
160 \begin{itemize} |
|
161 \item Turing Machines, 1936 |
|
162 \item Regular Expressions, 1956\\ |
|
163 \item The first compiler for COBOL, 1957\\ (Grace Hopper) |
|
164 \item But surprisingly research papers are still published nowadays |
|
165 \end{itemize} |
|
166 |
|
167 \begin{flushright} |
|
168 \includegraphics[scale=0.3]{pics/hopper.jpg}\\ |
|
169 \footnotesize\textcolor{gray}{Grace Hopper} |
|
170 \end{flushright} |
|
171 |
|
172 \mbox{}\\[-10mm] |
|
173 {\footnotesize\textcolor{gray}{(she made it to David Letterman's Tonight Show, \url{http://www.youtube.com/watch?v=aZOxtURhfEU})}} |
|
174 |
|
175 \end{frame} |
|
176 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
177 |
|
178 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
179 \begin{frame}[c] |
|
180 \frametitle{Why Bother?} |
|
181 |
|
182 \begin{columns}[b] |
|
183 \begin{column}{.5\textwidth} |
|
184 Ruby, Python\\ and Others\bigskip\\ |
|
185 \begin{tikzpicture}[y=.08cm, x=.10cm] |
|
186 %axis |
|
187 \draw (0,0) -- coordinate (x axis mid) (30,0); |
|
188 \draw (0,0) -- coordinate (y axis mid) (0,30); |
|
189 %ticks |
|
190 \foreach \x in {0,5,...,30} |
|
191 \draw (\x,1pt) -- (\x,-3pt) node[anchor=north] {\footnotesize\x}; |
|
192 \foreach \y in {0,5,...,30} |
|
193 \draw (1pt,\y) -- (-3pt,\y) node[anchor=east] {\footnotesize\y}; |
|
194 %labels |
|
195 \node[below=0.6cm] at (x axis mid) {\footnotesize number of \texttt{a}s}; |
|
196 \node[rotate=90,left=1cm] at (y axis mid) {\footnotesize time in secs}; |
|
197 %plots |
|
198 \draw[color=blue] plot[mark=*] |
|
199 file {re-python.data}; |
|
200 \draw[color=brown] plot[mark=triangle*] |
|
201 file {re-ruby.data}; |
|
202 %legend |
|
203 \begin{scope}[shift={(4,20)}] |
|
204 \draw[color=blue] (0,0) -- |
|
205 plot[mark=*] (0.25,0) -- (0.5,0) |
|
206 node[right]{\small Python}; |
|
207 \draw[yshift=-\baselineskip, color=brown] (0,0) -- |
|
208 plot[mark=triangle*] (0.25,0) -- (0.5,0) |
|
209 node[right]{\small Ruby}; |
|
210 \end{scope} |
|
211 \end{tikzpicture} |
|
212 \end{column} |
|
213 \begin{column}{.5\textwidth} |
|
214 Us (after this course)\\\mbox{}\bigskip\\ |
|
215 \begin{tikzpicture}[y=.08cm, x=.0003cm] |
|
216 %axis |
|
217 \draw (0,0) -- coordinate (x axis mid) (12000,0); |
|
218 \draw (0,0) -- coordinate (y axis mid) (0,30); |
|
219 %ticks |
|
220 \foreach \x in {0,4000,...,12000} |
|
221 \draw (\x,1pt) -- (\x,-3pt) node[anchor=north] {\footnotesize\x}; |
|
222 \foreach \y in {0,5,...,30} |
|
223 \draw (1pt,\y) -- (-3pt,\y) node[anchor=east] {\footnotesize\y}; |
|
224 %labels |
|
225 \node[below=0.6cm] at (x axis mid) {\footnotesize number of \texttt{a}s}; |
|
226 \node[rotate=90, left=1cm] at (y axis mid) {\footnotesize time in secs}; |
|
227 |
|
228 %plots |
|
229 \draw[color=green] plot[mark=square*, mark options={fill=white} ] |
|
230 file {re2b.data}; |
|
231 \draw[color=black] plot[mark=square*, mark options={fill=white} ] |
|
232 file {re3.data}; |
|
233 \end{tikzpicture} |
|
234 \end{column} |
|
235 \end{columns}\bigskip\medskip |
|
236 |
|
237 \hspace{2cm}matching \texttt{[a?]\{n\}[a]\{n\}} against |
|
238 $\underbrace{\texttt{a}...\texttt{a}}_n$ |
|
239 \end{frame} |
|
240 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
241 |
|
242 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
243 \begin{frame}[c] |
|
244 \frametitle{Lectures 1 - 6} |
|
245 |
|
246 transforming strings into structured data\\[10mm] |
|
247 |
|
248 \alert<2>{\LARGE\bf Lexing} \onslide<2>{\hfill{}baseed on regular expressions}\medskip\\ |
|
249 \hspace{5mm}(recognising ``words'')\\[6mm] |
|
250 |
|
251 {\LARGE\bf Parsing}\medskip\\ |
|
252 \hspace{5mm}(recognising ``sentences'') |
|
253 |
|
254 \begin{textblock}{1}(10,9.1) |
|
255 \begin{tabular}{c} |
|
256 \includegraphics[scale=0.1]{../pics/rosetta.jpg}\\[-2mm] |
|
257 \footnotesize Stone of Rosetta |
|
258 \end{tabular} |
|
259 \end{textblock} |
|
260 |
|
261 \end{frame} |
|
262 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
263 |
|
264 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
265 \begin{frame}[t] |
|
266 \frametitle{Familiar Regular Expr.} |
|
267 \small |
|
268 \begin{center} |
|
269 \texttt{[a-z0-9\_.-]+ @ [a-z0-9.-]+ . [a-z.]\{2,6\}} |
|
270 \end{center}\smallskip |
|
271 |
|
272 \begin{center} |
|
273 \begin{tabular}{@{}lp{8.5cm}@{}} |
|
274 \pcode{re*} & matches 0 or more times\\ |
|
275 \pcode{re+} & matches 1 or more times\\ |
|
276 \pcode{re?} & matches 0 or 1 times\\ |
|
277 \pcode{re\{n\}} & matches exactly \pcode{n} number of times\\ |
|
278 \pcode{re\{n,m\}} & matches at least \pcode{n} and at most {\tt m} times\\ |
|
279 \pcode{[...]} & matches any single character inside the brackets\\ |
|
280 \pcode{[^...]} & matches any single character not inside the |
|
281 brackets\\ |
|
282 \pcode{a-zA-Z} & character ranges\\ |
|
283 \pcode{\\d} & matches digits; equivalent to \pcode{[0-9]}\\ |
|
284 \pcode{.} & matches every character except newline\\ |
|
285 \pcode{(re)} & groups regular expressions and remembers |
|
286 the matched text |
|
287 \end{tabular} |
|
288 \end{center} |
|
289 |
|
290 |
|
291 \end{frame} |
|
292 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
293 |
|
294 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
295 \begin{frame}[c] |
|
296 \frametitle{Today} |
|
297 |
|
298 \begin{itemize} |
|
299 \item the ultimate goal is to implement a small compiler |
|
300 (a really small one for the JVM)\bigskip |
|
301 \end{itemize} |
|
302 |
|
303 Let's start with: |
|
304 |
|
305 \begin{itemize} |
|
306 \item a web-crawler |
|
307 \item an email harvester |
|
308 \item a web-scraper |
|
309 \end{itemize} |
|
310 |
|
311 \end{frame} |
|
312 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
313 |
|
314 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
315 \begin{frame}[t] |
|
316 \frametitle{A Web-Crawler} |
|
317 |
|
318 \mbox{}\\[10mm] |
|
319 |
|
320 \begin{enumerate} |
|
321 \item given an URL, read the corresponding webpage |
|
322 \item extract all links from it |
|
323 \item call the web-crawler again for all these links |
|
324 \end{enumerate} |
|
325 |
|
326 \end{frame} |
|
327 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
328 |
|
329 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
330 \mode<presentation>{ |
|
331 \begin{frame}[t] |
|
332 \frametitle{A Web-Crawler} |
|
333 |
|
334 \mbox{}\\[10mm] |
|
335 |
|
336 |
|
337 \begin{enumerate} |
|
338 \item given an URL, read the corresponding webpage |
|
339 \item if not possible print, out a problem |
|
340 \item if possible, extract all links from it |
|
341 \item call the web-crawler again for all these links |
|
342 \end{enumerate}\bigskip\pause |
|
343 |
|
344 \small (we need a bound for the number of recursive calls) |
|
345 |
|
346 \small (the purpose is to check all links on my own webpage) |
|
347 \end{frame}} |
|
348 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
349 |
53 |
350 |
54 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
351 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
55 \begin{frame}[c] |
352 \begin{frame}[c] |
56 |
353 |
57 \begin{textblock}{1}(2,5) |
354 \begin{textblock}{1}(2,5) |
83 \begin{tabular}{c} |
380 \begin{tabular}{c} |
84 \includegraphics[scale=0.15]{pics/laptop.png}\\[-2mm] |
381 \includegraphics[scale=0.15]{pics/laptop.png}\\[-2mm] |
85 \small Browser |
382 \small Browser |
86 \end{tabular} |
383 \end{tabular} |
87 \end{textblock} |
384 \end{textblock} |
88 |
385 \end{frame} |
89 \only<2>{ |
386 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
90 \begin{textblock}{10}(2,13.5) |
387 |
91 \begin{itemize} |
388 |
92 \item programming languages, compilers |
|
93 \end{itemize} |
|
94 \end{textblock}} |
|
95 |
|
96 \end{frame} |
|
97 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
98 |
|
99 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
100 \begin{frame}[c] |
|
101 |
|
102 transforming strings into structured data\\[10mm] |
|
103 |
|
104 {\LARGE\bf Lexing}\medskip\\ |
|
105 \hspace{5mm}(recognising ``words'')\\[6mm] |
|
106 |
|
107 {\LARGE\bf Parsing}\medskip\\ |
|
108 \hspace{5mm}(recognising ``sentences'') |
|
109 |
|
110 \end{frame} |
|
111 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
112 |
|
113 |
|
114 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
115 \begin{frame}[c] |
|
116 |
|
117 The subject is quite old: |
|
118 |
|
119 \begin{itemize} |
|
120 \item Turing Machines, 1936 |
|
121 \item first compiler for COBOL, 1957 (Grace Hopper) |
|
122 \item but surprisingly research papers are still published now |
|
123 \end{itemize} |
|
124 |
|
125 \begin{flushright} |
|
126 \includegraphics[scale=0.3]{pics/hopper.jpg}\\ |
|
127 \footnotesize\textcolor{gray}{Grace Hopper} |
|
128 \end{flushright} |
|
129 |
|
130 {\footnotesize\textcolor{gray}{(she made it to David Letterman's Tonight Show, \url{http://www.youtube.com/watch?v=aZOxtURhfEU})}} |
|
131 |
|
132 \end{frame} |
|
133 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
134 |
|
135 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
136 \mode<presentation>{ |
|
137 \begin{frame}[c] |
|
138 \frametitle{\begin{tabular}{c}This Course\end{tabular}} |
|
139 |
|
140 \begin{itemize} |
|
141 \item the ultimate goal is to implement a small compiler (a really small one for the JVM)\bigskip |
|
142 \end{itemize} |
|
143 |
|
144 Let's start with: |
|
145 |
|
146 \begin{itemize} |
|
147 \item a web-crawler |
|
148 \item an email harvester |
|
149 \item a web-scraper |
|
150 \end{itemize} |
|
151 |
|
152 \begin{textblock}{6}(10,7) |
|
153 \begin{tikzpicture}[scale=0.38] |
|
154 \begin{axis}[axis x line=bottom, axis y line=left, ylabel=secs, |
|
155 xlabel=n, |
|
156 enlargelimits=0.05, |
|
157 ybar interval=0.7, legend style=small] |
|
158 \addplot file {interpreted2.data}; |
|
159 \addplot file {compiled2.data}; |
|
160 %\legend{interpreted, compiled} |
|
161 \end{axis} |
|
162 \end{tikzpicture} |
|
163 \end{textblock} |
|
164 |
|
165 |
|
166 \end{frame}} |
|
167 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
168 |
|
169 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
170 \begin{frame}[t] |
|
171 \frametitle{A Web-Crawler} |
|
172 |
|
173 \mbox{}\\[10mm] |
|
174 |
|
175 \begin{enumerate} |
|
176 \item given an URL, read the corresponding webpage |
|
177 \item extract all links from it |
|
178 \item call the web-crawler again for all these links |
|
179 \end{enumerate} |
|
180 |
|
181 \end{frame} |
|
182 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
183 |
|
184 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
185 \mode<presentation>{ |
|
186 \begin{frame}[t] |
|
187 \frametitle{A Web-Crawler} |
|
188 |
|
189 \mbox{}\\[10mm] |
|
190 |
|
191 |
|
192 \begin{enumerate} |
|
193 \item given an URL, read the corresponding webpage |
|
194 \item if not possible print, out a problem |
|
195 \item if possible, extract all links from it |
|
196 \item call the web-crawler again for all these links |
|
197 \end{enumerate}\bigskip\pause |
|
198 |
|
199 \small (we need a bound for the number of recursive calls) |
|
200 |
|
201 \small (the purpose is to check all links on my own webpage) |
|
202 \end{frame}} |
|
203 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|
204 |
389 |
205 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
390 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
206 \begin{frame}[c] |
391 \begin{frame}[c] |
207 \frametitle{Scala} |
392 \frametitle{Scala} |
208 |
393 |