284 \begin{itemize} |
298 \begin{itemize} |
285 \item \ldots{} is a pattern or template for specifying strings |
299 \item \ldots{} is a pattern or template for specifying strings |
286 \end{itemize}\bigskip |
300 \end{itemize}\bigskip |
287 |
301 |
288 \begin{center} |
302 \begin{center} |
289 \only<1>{\code{"https?://[^\"]*"}}% |
303 \only<1>{\scode{"https?://[^"]*"}}% |
290 \only<2>{\code{""""https?://[^\"]*"""".r}} |
304 \only<2>{\scode{""""https?://[^"]*"""".r}} |
291 \end{center}\bigskip\bigskip |
305 \end{center}\bigskip\bigskip |
292 |
306 |
293 matches for example\\ |
307 matches for example\smallskip\\ |
294 \code{"http://www.foobar.com"}\\ |
308 \hspace{2mm}\code{"http://www.foobar.com"}\\ |
295 \code{"https://www.tls.org"}\\ |
309 \hspace{2mm}\code{"https://www.tls.org"}\\ |
296 |
310 |
297 \end{frame} |
311 \end{frame} |
298 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
312 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
299 |
313 |
300 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
314 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
301 \begin{frame}[c] |
315 \begin{frame}[t] |
302 |
316 \frametitle{Finding Operations} |
303 \code{rexp.findAllIn(string)}\medskip |
317 |
|
318 {\bf\code{rexp.findAllIn(string)}}\medskip |
304 |
319 |
305 returns a list of all (sub)strings that match the |
320 returns a list of all (sub)strings that match the |
306 regular expression\bigskip\bigskip |
321 regular expression |
307 |
322 \bigskip\bigskip |
308 \code{rexp.findFirstIn(string)}\medskip |
323 |
309 |
324 |
310 returns either \code{None} if no (sub)string matches |
325 {\bf\code{rexp.findFirstIn(string)}}\medskip |
311 or \code{Some(s)} with the first (sub)string |
326 |
312 |
327 returns either |
|
328 |
|
329 \begin{itemize} |
|
330 \item \code{None} if no (sub)string matches or |
|
331 \item \code{Some(s)} with the first (sub)string |
|
332 \end{itemize} |
|
333 |
313 \end{frame} |
334 \end{frame} |
314 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
335 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
315 |
336 |
316 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
337 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
317 \begin{frame}[c] |
338 \begin{frame}[c] |
318 |
339 |
319 \footnotesize |
340 \footnotesize |
320 \lstinputlisting{../progs/app2.scala}\medskip |
341 \lstinputlisting{../progs/app2.scala} |
321 |
342 |
322 \code{crawl(some_start_URL, 2)}\ |
343 \end{frame} |
323 |
344 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
324 \end{frame} |
345 |
325 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
346 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
326 |
347 \begin{frame}[c] |
327 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
348 |
328 \begin{frame}[c] |
349 \small |
|
350 A version that only crawls links in ``my'' domain: |
329 |
351 |
330 \footnotesize |
352 \footnotesize |
331 a version that only ``crawls'' links in my domain: |
|
332 |
|
333 \lstinputlisting{../progs/app3.scala} |
353 \lstinputlisting{../progs/app3.scala} |
334 |
354 |
335 \end{frame} |
355 \end{frame} |
336 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
356 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
337 |
357 |
338 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
358 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
339 \begin{frame}[c] |
359 \begin{frame}[c] |
340 |
360 \lstset{xleftmargin=-4mm} |
341 \footnotesize |
361 \small |
342 a little email ``harvester'': |
362 A little email harvester: |
343 |
363 |
344 \footnotesize |
364 \footnotesize |
345 \lstinputlisting{../progs/app4.scala}\bigskip |
365 \lstinputlisting{../progs/app4.scala}\bigskip |
346 |
366 |
347 \tiny |
367 \tiny |
348 \url{http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/} |
368 \url{http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/} |
349 |
369 |
350 \end{frame} |
370 \end{frame} |
351 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
371 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
352 |
372 |
353 |
|
354 |
|
355 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
373 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
356 \begin{frame}[t] |
374 \begin{frame}[t] |
357 \frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} |
375 \frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} |
358 |
376 |
359 Their inductive definition:\medskip |
377 Their inductive definition:\medskip |
360 |
378 |
361 \begin{textblock}{6}(2,5) |
379 \begin{textblock}{6}(2,5) |
362 \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} |
380 \begin{tabular}{rrl@ {\hspace{13mm}}l} |
363 \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\ |
381 \bl{$r$} & \bl{$::=$} & \bl{$\varnothing$} & null\\ |
364 & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\ |
382 & \bl{$\mid$} & \bl{$\epsilon$} & empty string / \pcode{""} / \pcode{[]}\\ |
365 & \bl{$\mid$} & \bl{c} & character\\ |
383 & \bl{$\mid$} & \bl{$c$} & character\\ |
366 & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\ |
384 & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\ |
367 & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\ |
385 & \bl{$\mid$} & \bl{$r_1 + r_2$} & alternative / choice\\ |
368 & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\ |
386 & \bl{$\mid$} & \bl{$r^*$} & star (zero or more)\\ |
369 \end{tabular} |
387 \end{tabular} |
370 \end{textblock} |
388 \end{textblock} |
371 |
389 |
372 \end{frame}} |
390 \end{frame} |
373 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
391 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
374 |
392 |
375 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
393 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
376 \begin{frame}[t] |
394 \begin{frame}[t] |
377 \frametitle{Regular Expressions} |
395 \frametitle{Regular Expressions} |
378 |
396 |
379 \small |
397 \small |
380 In Scala: |
398 In Scala:\bigskip |
381 |
399 |
382 \footnotesize |
400 \footnotesize |
383 \lstinputlisting{../progs/app51.scala} |
401 \lstinputlisting{../progs/app51.scala} |
384 |
402 |
385 |
403 |
391 \begin{frame}[c] |
409 \begin{frame}[c] |
392 \frametitle{\begin{tabular}{c}The Meaning of a\\[-2mm] |
410 \frametitle{\begin{tabular}{c}The Meaning of a\\[-2mm] |
393 Regular Expression\end{tabular}} |
411 Regular Expression\end{tabular}} |
394 |
412 |
395 \begin{textblock}{15}(1,4) |
413 \begin{textblock}{15}(1,4) |
396 \begin{tabular}{@ {}rcl} |
414 \begin{tabular}{rcl} |
397 \bl{$L$($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$}\\ |
415 \bl{$L(\varnothing)$} & \bl{$\dn$} & \bl{$\varnothing$}\\ |
398 \bl{$L$($\epsilon$)} & \bl{$\dn$} & \bl{$\{$""$\}$}\\ |
416 \bl{$L(\epsilon)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\ |
399 \bl{$L$(c)} & \bl{$\dn$} & \bl{$\{$"c"$\}$}\\ |
417 \bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\ |
400 \bl{$L$(r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{$L$(r$_1$) $\cup$ $L$(r$_2$)}\\ |
418 \bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\ |
401 \bl{$L$(r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{$\{$ s$_1$ @ s$_2$ $|$ s$_1$ $\in$ $L$(r$_1$) $\wedge$ s$_2$ $\in$ |
419 \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\ |
402 $L$(r$_2$) $\}$}\\ |
420 \bl{$L(r^*)$} & \bl{$\dn$} & \onslide<4->{\bl{$\bigcup_{n \ge 0} L(r)^n$}}\\ |
403 \bl{$L$(r$^*$)} & \bl{$\dn$} & \onslide<4->{\bl{$\bigcup_{n \ge 0}$ $L$(r)$^n$}}\\ |
|
404 \end{tabular}\bigskip |
421 \end{tabular}\bigskip |
405 |
422 |
406 \onslide<2->{ |
423 \onslide<2->{ |
407 \hspace{5mm}\bl{$L$(r)$^0$ $\;\dn\;$ $\{$""$\}$}\\ |
424 \hspace{5mm}\bl{$L(r)^0 \;\dn\; \{[]\}$}\\ |
408 \bl{$L$(r)$^{n+1}$ $\;\dn\;$ $L$(r) @ $L$(r)$^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\ |
425 \bl{$L(r)^{n+1} \;\dn\; L(r) \,@\, L(r)^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\ |
409 \small\hspace{5cm}\textcolor{gray}{$\{$ s$_1$ @ s$_2$ $|$ s$_1$ $\in$ $L$(r) $\wedge$ s$_2$ $\in$ |
426 \small\hspace{5cm}\textcolor{gray}{$\{ s_1 @ s_2 \;|\; s_1\in L(r) \wedge s_2 \in L(r)^n \}$}} |
410 $L$(r)$^n$ $\}$}} |
|
411 } |
427 } |
412 \end{textblock} |
428 \end{textblock} |
413 |
429 |
414 \end{frame}} |
430 \end{frame}} |
415 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
431 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
416 |
432 |
417 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
433 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
418 \mode<presentation>{ |
434 \begin{frame}[c] |
419 \begin{frame}[c] |
435 \frametitle{The Meaning of Matching} |
420 \frametitle{\begin{tabular}{c}The Meaning of Matching\end{tabular}} |
436 |
421 |
437 \begin{bubble}[10cm] |
422 \large |
438 \large |
423 a regular expression \bl{r} matches a string \bl{s} is defined as |
439 A regular expression \bl{$r$} matches a string \bl{$s$} |
|
440 provided |
424 |
441 |
425 \begin{center} |
442 \begin{center} |
426 \bl{s $\in$ $L$(r)}\\ |
443 \bl{$s \in L(r)$}\\ |
427 \end{center} |
444 \end{center} |
428 |
445 \end{bubble} |
429 \end{frame}} |
446 |
|
447 \end{frame} |
430 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
448 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
431 |
449 |
432 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
450 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
433 \mode<presentation>{ |
451 \mode<presentation>{ |
434 \begin{frame}[c] |
452 \begin{frame}[c] |