# HG changeset patch # User Christian Urban # Date 1411338223 -3600 # Node ID dcd4688690ce32b001b01a117914d7f51b8cc70e # Parent 75c4698935142f7fff7fee6af7ca4c04129c14f8 updated diff -r 75c469893514 -r dcd4688690ce langs.sty --- a/langs.sty Sun Sep 21 17:40:04 2014 +0100 +++ b/langs.sty Sun Sep 21 23:23:43 2014 +0100 @@ -20,15 +20,15 @@ private,protected,requires,return,sealed,% super,this,throw,trait,true,try,% type,val,var,while,with,yield},% - otherkeywords={=>,<-,<\%,<:,>:,\#,@},% + otherkeywords={=>,<-,<\%,<:,>:,\#},% sensitive=true,% %directives={Int,Char,Rexp,String,Boolean,BigInt,Unit,List,Set},% %moredelim=*[directive]:,% morecomment=[l]{//},% morecomment=[n]{/*}{*/}, + morestring=[s]{"""}{"""}, morestring=[b]", morestring=[b]', - morestring=[b]""" }[keywords,comments,strings] \lstdefinelanguage{While}{ @@ -41,7 +41,6 @@ \lstdefinestyle{mystyle} {basicstyle=\ttfamily, keywordstyle=\color{codepurple}\bfseries, - %directivestyle=\color{codeblue}\bfseries, stringstyle=\color{codegreen}, commentstyle=\color{codegreen}, morecomment=[s][\color{codedocblue}]{/**}{*/}, @@ -54,7 +53,8 @@ showstringspaces=false, xleftmargin=8mm, emphstyle=\color{codeblue}\bfseries, - keepspaces} + keepspaces +} \lstset{language=Scala, style=mystyle} @@ -62,4 +62,5 @@ \newcommand{\code}[1]{{\lstinline{#1}}} \newcommand{\pcode}[1]{\mbox{\lstset{language={},keywordstyle=\color{black}}\lstinline!#1!}} +\newcommand{\scode}[1]{\mbox{\lstset{language={},basicstyle=\ttfamily\color{codegreen}}\lstinline!#1!}} \makeatother diff -r 75c469893514 -r dcd4688690ce progs/app0.scala --- a/progs/app0.scala Sun Sep 21 17:40:04 2014 +0100 +++ b/progs/app0.scala Sun Sep 21 23:23:43 2014 +0100 @@ -1,6 +1,7 @@ import io.Source def get_page(url: String) : String = { - Source.fromURL(url).take(10000).mkString + Source.fromURL(url).take(10000).mkString +} diff -r 75c469893514 -r dcd4688690ce progs/app1.scala --- a/progs/app1.scala Sun Sep 21 17:40:04 2014 +0100 +++ b/progs/app1.scala Sun Sep 21 23:23:43 2014 +0100 @@ -1,3 +1,4 @@ -def get_page(url: String) : String = +def get_page(url: String) : String = { Try(Source.fromURL(url).take(10000).mkString) getOrElse { println(s" Problem with: $url"); ""} +} diff -r 75c469893514 -r dcd4688690ce progs/app2.scala --- a/progs/app2.scala Sun Sep 21 17:40:04 2014 +0100 +++ b/progs/app2.scala Sun Sep 21 23:23:43 2014 +0100 @@ -1,17 +1,16 @@ -val http_pattern = """\"https?://[^\"]*\"""".r +val http_pattern = """"https?://[^"]*"""".r def unquote(s: String) = s.drop(1).dropRight(1) -def get_all_URLs(page: String) : Set[String] = { +def get_all_URLs(page: String) : Set[String] = http_pattern.findAllIn(page).map(unquote).toSet -} def crawl(url: String, n: Int) : Unit = { if (n == 0) () else { println(s"Visiting: $n $url") - for (u <- get_all_URLs(get_page(url))) - crawl(u, n - 1) + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) } } +crawl(some_start_URL, 2) diff -r 75c469893514 -r dcd4688690ce progs/app3.scala --- a/progs/app3.scala Sun Sep 21 17:40:04 2014 +0100 +++ b/progs/app3.scala Sun Sep 21 23:23:43 2014 +0100 @@ -2,10 +2,12 @@ def crawl(url: String, n: Int) : Unit = { if (n == 0) () - else if (my_urls.findFirstIn(url) == None) () + else if (my_urls.findFirstIn(url) == None) { + println(s"Visiting: $n $url") + get_page(url); () + } else { println(s"Visiting: $n $url") - for (u <- get_all_URLs(get_page(url))) - crawl(u, n - 1) + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) } } diff -r 75c469893514 -r dcd4688690ce progs/app4.scala --- a/progs/app4.scala Sun Sep 21 17:40:04 2014 +0100 +++ b/progs/app4.scala Sun Sep 21 23:23:43 2014 +0100 @@ -1,15 +1,17 @@ -val http_pattern = """\"https?://[^\"]*\"""".r +val http_pattern = """"https?://[^"]*"""".r val my_urls = """urbanc""".r val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r +def print_str(s: String) = + if (s == "") () else println(s) + def crawl(url: String, n: Int) : Unit = { if (n == 0) () else { println(s"Visiting: $n $url") val page = get_page(url) - println(email_pattern.findAllIn(page).mkString("\n")) - for (u <- get_all_URLs(page)) - crawl(u, n - 1) + print_str(email_pattern.findAllIn(page).mkString("\n")) + for (u <- get_all_URLs(page).par) crawl(u, n - 1) } } diff -r 75c469893514 -r dcd4688690ce progs/crawler1.scala --- a/progs/crawler1.scala Sun Sep 21 17:40:04 2014 +0100 +++ b/progs/crawler1.scala Sun Sep 21 23:23:43 2014 +0100 @@ -17,9 +17,9 @@ // drops the first and last character from a string def unquote(s: String) = s.drop(1).dropRight(1) -def get_all_URLs(page: String) : Set[String] = { +def get_all_URLs(page: String) : Set[String] = http_pattern.findAllIn(page).map(unquote).toSet -} + // naive version of crawl - searches until a given depth, // visits pages potentially more than once diff -r 75c469893514 -r dcd4688690ce progs/crawler2.scala --- a/progs/crawler2.scala Sun Sep 21 17:40:04 2014 +0100 +++ b/progs/crawler2.scala Sun Sep 21 23:23:43 2014 +0100 @@ -17,9 +17,8 @@ def unquote(s: String) = s.drop(1).dropRight(1) -def get_all_URLs(page: String) : Set[String] = { +def get_all_URLs(page: String) : Set[String] = http_pattern.findAllIn(page).map(unquote).toSet -} def crawl(url: String, n: Int) : Unit = { if (n == 0) () diff -r 75c469893514 -r dcd4688690ce progs/crawler3.scala --- a/progs/crawler3.scala Sun Sep 21 17:40:04 2014 +0100 +++ b/progs/crawler3.scala Sun Sep 21 23:23:43 2014 +0100 @@ -17,9 +17,8 @@ def unquote(s: String) = s.drop(1).dropRight(1) -def get_all_URLs(page: String) : Set[String] = { +def get_all_URLs(page: String) : Set[String] = http_pattern.findAllIn(page).map(unquote).toSet -} def print_str(s: String) = if (s == "") () else println(s) diff -r 75c469893514 -r dcd4688690ce slides/slides01.pdf Binary file slides/slides01.pdf has changed diff -r 75c469893514 -r dcd4688690ce slides/slides01.tex --- a/slides/slides01.tex Sun Sep 21 17:40:04 2014 +0100 +++ b/slides/slides01.tex Sun Sep 21 23:23:43 2014 +0100 @@ -6,6 +6,15 @@ \hfuzz=220pt +%\setmonofont[Scale=.88]{Consolas} +%\newfontfamily{\consolas}{Consolas} + +\lstset{language=Scala, + style=mystyle, + numbersep=0pt, + numbers=none, + xleftmargin=0mm} + \newcommand{\bl}[1]{\textcolor{blue}{#1}} % beamer stuff @@ -197,14 +206,19 @@ \begin{frame}[c] \frametitle{Scala} -\small a simple Scala function for reading webpages +\small A simple Scala function for reading webpages: +\smallskip \footnotesize -\lstinputlisting{../progs/app0.scala}\pause -\lstinline{get_page("""http://www.inf.kcl.ac.uk/staff/urbanc/""")}\pause\bigskip +\lstinputlisting{../progs/app0.scala} +\medskip\pause + +\lstinline{get_page("""http://www.inf.kcl.ac.uk/staff/urbanc/""")} +\bigskip\medskip\pause -\small slightly more complicated for handling errors properly: +\small A slightly more complicated version for handling errors properly: +\smallskip \footnotesize \lstinputlisting{../progs/app1.scala} @@ -286,40 +300,36 @@ \end{itemize}\bigskip \begin{center} -\only<1>{\code{"https?://[^\"]*"}}% -\only<2>{\code{""""https?://[^\"]*"""".r}} +\only<1>{\scode{"https?://[^"]*"}}% +\only<2>{\scode{""""https?://[^"]*"""".r}} \end{center}\bigskip\bigskip -matches for example\\ -\code{"http://www.foobar.com"}\\ -\code{"https://www.tls.org"}\\ +matches for example\smallskip\\ +\hspace{2mm}\code{"http://www.foobar.com"}\\ +\hspace{2mm}\code{"https://www.tls.org"}\\ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] +\begin{frame}[t] +\frametitle{Finding Operations} -\code{rexp.findAllIn(string)}\medskip +{\bf\code{rexp.findAllIn(string)}}\medskip returns a list of all (sub)strings that match the -regular expression\bigskip\bigskip - -\code{rexp.findFirstIn(string)}\medskip - -returns either \code{None} if no (sub)string matches -or \code{Some(s)} with the first (sub)string +regular expression +\bigskip\bigskip -\end{frame} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +{\bf\code{rexp.findFirstIn(string)}}\medskip + +returns either -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{frame}[c] - -\footnotesize -\lstinputlisting{../progs/app2.scala}\medskip - -\code{crawl(some_start_URL, 2)}\ +\begin{itemize} +\item \code{None} if no (sub)string matches or +\item \code{Some(s)} with the first (sub)string +\end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -328,8 +338,18 @@ \begin{frame}[c] \footnotesize -a version that only ``crawls'' links in my domain: +\lstinputlisting{../progs/app2.scala} + +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[c] + +\small +A version that only crawls links in ``my'' domain: + +\footnotesize \lstinputlisting{../progs/app3.scala} \end{frame} @@ -337,9 +357,9 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[c] - -\footnotesize -a little email ``harvester'': +\lstset{xleftmargin=-4mm} +\small +A little email harvester: \footnotesize \lstinputlisting{../progs/app4.scala}\bigskip @@ -350,8 +370,6 @@ \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[t] \frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} @@ -359,17 +377,17 @@ Their inductive definition:\medskip \begin{textblock}{6}(2,5) - \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} - \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\ - & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\ - & \bl{$\mid$} & \bl{c} & character\\ - & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\ - & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\ - & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\ + \begin{tabular}{rrl@ {\hspace{13mm}}l} + \bl{$r$} & \bl{$::=$} & \bl{$\varnothing$} & null\\ + & \bl{$\mid$} & \bl{$\epsilon$} & empty string / \pcode{""} / \pcode{[]}\\ + & \bl{$\mid$} & \bl{$c$} & character\\ + & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\ + & \bl{$\mid$} & \bl{$r_1 + r_2$} & alternative / choice\\ + & \bl{$\mid$} & \bl{$r^*$} & star (zero or more)\\ \end{tabular} \end{textblock} -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -377,7 +395,7 @@ \frametitle{Regular Expressions} \small -In Scala: +In Scala:\bigskip \footnotesize \lstinputlisting{../progs/app51.scala} @@ -393,21 +411,19 @@ Regular Expression\end{tabular}} \begin{textblock}{15}(1,4) - \begin{tabular}{@ {}rcl} - \bl{$L$($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$}\\ - \bl{$L$($\epsilon$)} & \bl{$\dn$} & \bl{$\{$""$\}$}\\ - \bl{$L$(c)} & \bl{$\dn$} & \bl{$\{$"c"$\}$}\\ - \bl{$L$(r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{$L$(r$_1$) $\cup$ $L$(r$_2$)}\\ - \bl{$L$(r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{$\{$ s$_1$ @ s$_2$ $|$ s$_1$ $\in$ $L$(r$_1$) $\wedge$ s$_2$ $\in$ - $L$(r$_2$) $\}$}\\ - \bl{$L$(r$^*$)} & \bl{$\dn$} & \onslide<4->{\bl{$\bigcup_{n \ge 0}$ $L$(r)$^n$}}\\ + \begin{tabular}{rcl} + \bl{$L(\varnothing)$} & \bl{$\dn$} & \bl{$\varnothing$}\\ + \bl{$L(\epsilon)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\ + \bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\ + \bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\ + \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\ + \bl{$L(r^*)$} & \bl{$\dn$} & \onslide<4->{\bl{$\bigcup_{n \ge 0} L(r)^n$}}\\ \end{tabular}\bigskip \onslide<2->{ -\hspace{5mm}\bl{$L$(r)$^0$ $\;\dn\;$ $\{$""$\}$}\\ -\bl{$L$(r)$^{n+1}$ $\;\dn\;$ $L$(r) @ $L$(r)$^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\ -\small\hspace{5cm}\textcolor{gray}{$\{$ s$_1$ @ s$_2$ $|$ s$_1$ $\in$ $L$(r) $\wedge$ s$_2$ $\in$ - $L$(r)$^n$ $\}$}} +\hspace{5mm}\bl{$L(r)^0 \;\dn\; \{[]\}$}\\ +\bl{$L(r)^{n+1} \;\dn\; L(r) \,@\, L(r)^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\ +\small\hspace{5cm}\textcolor{gray}{$\{ s_1 @ s_2 \;|\; s_1\in L(r) \wedge s_2 \in L(r)^n \}$}} } \end{textblock} @@ -415,18 +431,20 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ \begin{frame}[c] -\frametitle{\begin{tabular}{c}The Meaning of Matching\end{tabular}} +\frametitle{The Meaning of Matching} +\begin{bubble}[10cm] \large -a regular expression \bl{r} matches a string \bl{s} is defined as +A regular expression \bl{$r$} matches a string \bl{$s$} +provided \begin{center} -\bl{s $\in$ $L$(r)}\\ +\bl{$s \in L(r)$}\\ \end{center} +\end{bubble} -\end{frame}} +\end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%