--- a/langs.sty Sun Sep 21 17:40:04 2014 +0100
+++ b/langs.sty Sun Sep 21 23:23:43 2014 +0100
@@ -20,15 +20,15 @@
private,protected,requires,return,sealed,%
super,this,throw,trait,true,try,%
type,val,var,while,with,yield},%
- otherkeywords={=>,<-,<\%,<:,>:,\#,@},%
+ otherkeywords={=>,<-,<\%,<:,>:,\#},%
sensitive=true,%
%directives={Int,Char,Rexp,String,Boolean,BigInt,Unit,List,Set},%
%moredelim=*[directive]:,%
morecomment=[l]{//},%
morecomment=[n]{/*}{*/},
+ morestring=[s]{"""}{"""},
morestring=[b]",
morestring=[b]',
- morestring=[b]"""
}[keywords,comments,strings]
\lstdefinelanguage{While}{
@@ -41,7 +41,6 @@
\lstdefinestyle{mystyle}
{basicstyle=\ttfamily,
keywordstyle=\color{codepurple}\bfseries,
- %directivestyle=\color{codeblue}\bfseries,
stringstyle=\color{codegreen},
commentstyle=\color{codegreen},
morecomment=[s][\color{codedocblue}]{/**}{*/},
@@ -54,7 +53,8 @@
showstringspaces=false,
xleftmargin=8mm,
emphstyle=\color{codeblue}\bfseries,
- keepspaces}
+ keepspaces
+}
\lstset{language=Scala,
style=mystyle}
@@ -62,4 +62,5 @@
\newcommand{\code}[1]{{\lstinline{#1}}}
\newcommand{\pcode}[1]{\mbox{\lstset{language={},keywordstyle=\color{black}}\lstinline!#1!}}
+\newcommand{\scode}[1]{\mbox{\lstset{language={},basicstyle=\ttfamily\color{codegreen}}\lstinline!#1!}}
\makeatother
--- a/progs/app0.scala Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/app0.scala Sun Sep 21 23:23:43 2014 +0100
@@ -1,6 +1,7 @@
import io.Source
def get_page(url: String) : String = {
- Source.fromURL(url).take(10000).mkString
+ Source.fromURL(url).take(10000).mkString
+}
--- a/progs/app1.scala Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/app1.scala Sun Sep 21 23:23:43 2014 +0100
@@ -1,3 +1,4 @@
-def get_page(url: String) : String =
+def get_page(url: String) : String = {
Try(Source.fromURL(url).take(10000).mkString) getOrElse
{ println(s" Problem with: $url"); ""}
+}
--- a/progs/app2.scala Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/app2.scala Sun Sep 21 23:23:43 2014 +0100
@@ -1,17 +1,16 @@
-val http_pattern = """\"https?://[^\"]*\"""".r
+val http_pattern = """"https?://[^"]*"""".r
def unquote(s: String) = s.drop(1).dropRight(1)
-def get_all_URLs(page: String) : Set[String] = {
+def get_all_URLs(page: String) : Set[String] =
http_pattern.findAllIn(page).map(unquote).toSet
-}
def crawl(url: String, n: Int) : Unit = {
if (n == 0) ()
else {
println(s"Visiting: $n $url")
- for (u <- get_all_URLs(get_page(url)))
- crawl(u, n - 1)
+ for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
}
}
+crawl(some_start_URL, 2)
--- a/progs/app3.scala Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/app3.scala Sun Sep 21 23:23:43 2014 +0100
@@ -2,10 +2,12 @@
def crawl(url: String, n: Int) : Unit = {
if (n == 0) ()
- else if (my_urls.findFirstIn(url) == None) ()
+ else if (my_urls.findFirstIn(url) == None) {
+ println(s"Visiting: $n $url")
+ get_page(url); ()
+ }
else {
println(s"Visiting: $n $url")
- for (u <- get_all_URLs(get_page(url)))
- crawl(u, n - 1)
+ for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
}
}
--- a/progs/app4.scala Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/app4.scala Sun Sep 21 23:23:43 2014 +0100
@@ -1,15 +1,17 @@
-val http_pattern = """\"https?://[^\"]*\"""".r
+val http_pattern = """"https?://[^"]*"""".r
val my_urls = """urbanc""".r
val email_pattern =
"""([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
+def print_str(s: String) =
+ if (s == "") () else println(s)
+
def crawl(url: String, n: Int) : Unit = {
if (n == 0) ()
else {
println(s"Visiting: $n $url")
val page = get_page(url)
- println(email_pattern.findAllIn(page).mkString("\n"))
- for (u <- get_all_URLs(page))
- crawl(u, n - 1)
+ print_str(email_pattern.findAllIn(page).mkString("\n"))
+ for (u <- get_all_URLs(page).par) crawl(u, n - 1)
}
}
--- a/progs/crawler1.scala Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/crawler1.scala Sun Sep 21 23:23:43 2014 +0100
@@ -17,9 +17,9 @@
// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)
-def get_all_URLs(page: String) : Set[String] = {
+def get_all_URLs(page: String) : Set[String] =
http_pattern.findAllIn(page).map(unquote).toSet
-}
+
// naive version of crawl - searches until a given depth,
// visits pages potentially more than once
--- a/progs/crawler2.scala Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/crawler2.scala Sun Sep 21 23:23:43 2014 +0100
@@ -17,9 +17,8 @@
def unquote(s: String) = s.drop(1).dropRight(1)
-def get_all_URLs(page: String) : Set[String] = {
+def get_all_URLs(page: String) : Set[String] =
http_pattern.findAllIn(page).map(unquote).toSet
-}
def crawl(url: String, n: Int) : Unit = {
if (n == 0) ()
--- a/progs/crawler3.scala Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/crawler3.scala Sun Sep 21 23:23:43 2014 +0100
@@ -17,9 +17,8 @@
def unquote(s: String) = s.drop(1).dropRight(1)
-def get_all_URLs(page: String) : Set[String] = {
+def get_all_URLs(page: String) : Set[String] =
http_pattern.findAllIn(page).map(unquote).toSet
-}
def print_str(s: String) =
if (s == "") () else println(s)
Binary file slides/slides01.pdf has changed
--- a/slides/slides01.tex Sun Sep 21 17:40:04 2014 +0100
+++ b/slides/slides01.tex Sun Sep 21 23:23:43 2014 +0100
@@ -6,6 +6,15 @@
\hfuzz=220pt
+%\setmonofont[Scale=.88]{Consolas}
+%\newfontfamily{\consolas}{Consolas}
+
+\lstset{language=Scala,
+ style=mystyle,
+ numbersep=0pt,
+ numbers=none,
+ xleftmargin=0mm}
+
\newcommand{\bl}[1]{\textcolor{blue}{#1}}
% beamer stuff
@@ -197,14 +206,19 @@
\begin{frame}[c]
\frametitle{Scala}
-\small a simple Scala function for reading webpages
+\small A simple Scala function for reading webpages:
+\smallskip
\footnotesize
-\lstinputlisting{../progs/app0.scala}\pause
-\lstinline{get_page("""http://www.inf.kcl.ac.uk/staff/urbanc/""")}\pause\bigskip
+\lstinputlisting{../progs/app0.scala}
+\medskip\pause
+
+\lstinline{get_page("""http://www.inf.kcl.ac.uk/staff/urbanc/""")}
+\bigskip\medskip\pause
-\small slightly more complicated for handling errors properly:
+\small A slightly more complicated version for handling errors properly:
+\smallskip
\footnotesize
\lstinputlisting{../progs/app1.scala}
@@ -286,40 +300,36 @@
\end{itemize}\bigskip
\begin{center}
-\only<1>{\code{"https?://[^\"]*"}}%
-\only<2>{\code{""""https?://[^\"]*"""".r}}
+\only<1>{\scode{"https?://[^"]*"}}%
+\only<2>{\scode{""""https?://[^"]*"""".r}}
\end{center}\bigskip\bigskip
-matches for example\\
-\code{"http://www.foobar.com"}\\
-\code{"https://www.tls.org"}\\
+matches for example\smallskip\\
+\hspace{2mm}\code{"http://www.foobar.com"}\\
+\hspace{2mm}\code{"https://www.tls.org"}\\
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}[c]
+\begin{frame}[t]
+\frametitle{Finding Operations}
-\code{rexp.findAllIn(string)}\medskip
+{\bf\code{rexp.findAllIn(string)}}\medskip
returns a list of all (sub)strings that match the
-regular expression\bigskip\bigskip
-
-\code{rexp.findFirstIn(string)}\medskip
-
-returns either \code{None} if no (sub)string matches
-or \code{Some(s)} with the first (sub)string
+regular expression
+\bigskip\bigskip
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+{\bf\code{rexp.findFirstIn(string)}}\medskip
+
+returns either
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}[c]
-
-\footnotesize
-\lstinputlisting{../progs/app2.scala}\medskip
-
-\code{crawl(some_start_URL, 2)}\
+\begin{itemize}
+\item \code{None} if no (sub)string matches or
+\item \code{Some(s)} with the first (sub)string
+\end{itemize}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -328,8 +338,18 @@
\begin{frame}[c]
\footnotesize
-a version that only ``crawls'' links in my domain:
+\lstinputlisting{../progs/app2.scala}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+\small
+A version that only crawls links in ``my'' domain:
+
+\footnotesize
\lstinputlisting{../progs/app3.scala}
\end{frame}
@@ -337,9 +357,9 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[c]
-
-\footnotesize
-a little email ``harvester'':
+\lstset{xleftmargin=-4mm}
+\small
+A little email harvester:
\footnotesize
\lstinputlisting{../progs/app4.scala}\bigskip
@@ -350,8 +370,6 @@
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[t]
\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}}
@@ -359,17 +377,17 @@
Their inductive definition:\medskip
\begin{textblock}{6}(2,5)
- \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
- \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\
- & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\
- & \bl{$\mid$} & \bl{c} & character\\
- & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\
- & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\
- & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\
+ \begin{tabular}{rrl@ {\hspace{13mm}}l}
+ \bl{$r$} & \bl{$::=$} & \bl{$\varnothing$} & null\\
+ & \bl{$\mid$} & \bl{$\epsilon$} & empty string / \pcode{""} / \pcode{[]}\\
+ & \bl{$\mid$} & \bl{$c$} & character\\
+ & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
+ & \bl{$\mid$} & \bl{$r_1 + r_2$} & alternative / choice\\
+ & \bl{$\mid$} & \bl{$r^*$} & star (zero or more)\\
\end{tabular}
\end{textblock}
-\end{frame}}
+\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -377,7 +395,7 @@
\frametitle{Regular Expressions}
\small
-In Scala:
+In Scala:\bigskip
\footnotesize
\lstinputlisting{../progs/app51.scala}
@@ -393,21 +411,19 @@
Regular Expression\end{tabular}}
\begin{textblock}{15}(1,4)
- \begin{tabular}{@ {}rcl}
- \bl{$L$($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$}\\
- \bl{$L$($\epsilon$)} & \bl{$\dn$} & \bl{$\{$""$\}$}\\
- \bl{$L$(c)} & \bl{$\dn$} & \bl{$\{$"c"$\}$}\\
- \bl{$L$(r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{$L$(r$_1$) $\cup$ $L$(r$_2$)}\\
- \bl{$L$(r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{$\{$ s$_1$ @ s$_2$ $|$ s$_1$ $\in$ $L$(r$_1$) $\wedge$ s$_2$ $\in$
- $L$(r$_2$) $\}$}\\
- \bl{$L$(r$^*$)} & \bl{$\dn$} & \onslide<4->{\bl{$\bigcup_{n \ge 0}$ $L$(r)$^n$}}\\
+ \begin{tabular}{rcl}
+ \bl{$L(\varnothing)$} & \bl{$\dn$} & \bl{$\varnothing$}\\
+ \bl{$L(\epsilon)$} & \bl{$\dn$} & \bl{$\{[]\}$}\\
+ \bl{$L(c)$} & \bl{$\dn$} & \bl{$\{[c]\}$}\\
+ \bl{$L(r_1 + r_2)$} & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
+ \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\
+ \bl{$L(r^*)$} & \bl{$\dn$} & \onslide<4->{\bl{$\bigcup_{n \ge 0} L(r)^n$}}\\
\end{tabular}\bigskip
\onslide<2->{
-\hspace{5mm}\bl{$L$(r)$^0$ $\;\dn\;$ $\{$""$\}$}\\
-\bl{$L$(r)$^{n+1}$ $\;\dn\;$ $L$(r) @ $L$(r)$^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\
-\small\hspace{5cm}\textcolor{gray}{$\{$ s$_1$ @ s$_2$ $|$ s$_1$ $\in$ $L$(r) $\wedge$ s$_2$ $\in$
- $L$(r)$^n$ $\}$}}
+\hspace{5mm}\bl{$L(r)^0 \;\dn\; \{[]\}$}\\
+\bl{$L(r)^{n+1} \;\dn\; L(r) \,@\, L(r)^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\
+\small\hspace{5cm}\textcolor{gray}{$\{ s_1 @ s_2 \;|\; s_1\in L(r) \wedge s_2 \in L(r)^n \}$}}
}
\end{textblock}
@@ -415,18 +431,20 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
\begin{frame}[c]
-\frametitle{\begin{tabular}{c}The Meaning of Matching\end{tabular}}
+\frametitle{The Meaning of Matching}
+\begin{bubble}[10cm]
\large
-a regular expression \bl{r} matches a string \bl{s} is defined as
+A regular expression \bl{$r$} matches a string \bl{$s$}
+provided
\begin{center}
-\bl{s $\in$ $L$(r)}\\
+\bl{$s \in L(r)$}\\
\end{center}
+\end{bubble}
-\end{frame}}
+\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%