updated
authorChristian Urban <christian dot urban at kcl dot ac dot uk>
Sun, 21 Sep 2014 23:23:43 +0100
changeset 254 dcd4688690ce
parent 253 75c469893514
child 255 96a99237fa42
updated
langs.sty
progs/app0.scala
progs/app1.scala
progs/app2.scala
progs/app3.scala
progs/app4.scala
progs/crawler1.scala
progs/crawler2.scala
progs/crawler3.scala
slides/slides01.pdf
slides/slides01.tex
--- a/langs.sty	Sun Sep 21 17:40:04 2014 +0100
+++ b/langs.sty	Sun Sep 21 23:23:43 2014 +0100
@@ -20,15 +20,15 @@
     private,protected,requires,return,sealed,%
     super,this,throw,trait,true,try,%
     type,val,var,while,with,yield},%
-  otherkeywords={=>,<-,<\%,<:,>:,\#,@},%
+  otherkeywords={=>,<-,<\%,<:,>:,\#},%
   sensitive=true,%
   %directives={Int,Char,Rexp,String,Boolean,BigInt,Unit,List,Set},%
   %moredelim=*[directive]:,%
   morecomment=[l]{//},%
   morecomment=[n]{/*}{*/},
+  morestring=[s]{"""}{"""},
   morestring=[b]",
   morestring=[b]',
-  morestring=[b]"""
 }[keywords,comments,strings]
 
 \lstdefinelanguage{While}{
@@ -41,7 +41,6 @@
 \lstdefinestyle{mystyle}
        {basicstyle=\ttfamily,
 	keywordstyle=\color{codepurple}\bfseries,
-        %directivestyle=\color{codeblue}\bfseries,
 	stringstyle=\color{codegreen},
 	commentstyle=\color{codegreen},
 	morecomment=[s][\color{codedocblue}]{/**}{*/},
@@ -54,7 +53,8 @@
 	showstringspaces=false,
         xleftmargin=8mm,
         emphstyle=\color{codeblue}\bfseries,
-        keepspaces}
+        keepspaces
+}
 
 \lstset{language=Scala,
         style=mystyle}
@@ -62,4 +62,5 @@
 
 \newcommand{\code}[1]{{\lstinline{#1}}}
 \newcommand{\pcode}[1]{\mbox{\lstset{language={},keywordstyle=\color{black}}\lstinline!#1!}}
+\newcommand{\scode}[1]{\mbox{\lstset{language={},basicstyle=\ttfamily\color{codegreen}}\lstinline!#1!}}
 \makeatother
--- a/progs/app0.scala	Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/app0.scala	Sun Sep 21 23:23:43 2014 +0100
@@ -1,6 +1,7 @@
 import io.Source
 
 def get_page(url: String) : String = { 
-  Source.fromURL(url).take(10000).mkString  
+  Source.fromURL(url).take(10000).mkString
+}  
 
 
--- a/progs/app1.scala	Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/app1.scala	Sun Sep 21 23:23:43 2014 +0100
@@ -1,3 +1,4 @@
-def get_page(url: String) : String = 
+def get_page(url: String) : String = {
   Try(Source.fromURL(url).take(10000).mkString) getOrElse 
     { println(s"  Problem with: $url"); ""}
+}
--- a/progs/app2.scala	Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/app2.scala	Sun Sep 21 23:23:43 2014 +0100
@@ -1,17 +1,16 @@
-val http_pattern = """\"https?://[^\"]*\"""".r
+val http_pattern = """"https?://[^"]*"""".r
 
 def unquote(s: String) = s.drop(1).dropRight(1)
 
-def get_all_URLs(page: String) : Set[String] = {
+def get_all_URLs(page: String) : Set[String] = 
   http_pattern.findAllIn(page).map(unquote).toSet
-}
 
 def crawl(url: String, n: Int) : Unit = {
   if (n == 0) ()
   else {
     println(s"Visiting: $n $url")
-    for (u <- get_all_URLs(get_page(url))) 
-      crawl(u, n - 1)
+    for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
   }
 }
 
+crawl(some_start_URL, 2)
--- a/progs/app3.scala	Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/app3.scala	Sun Sep 21 23:23:43 2014 +0100
@@ -2,10 +2,12 @@
 
 def crawl(url: String, n: Int) : Unit = {
   if (n == 0) ()
-  else if (my_urls.findFirstIn(url) == None) ()
+  else if (my_urls.findFirstIn(url) == None) { 
+    println(s"Visiting: $n $url")
+    get_page(url); () 
+  }
   else {
     println(s"Visiting: $n $url")
-    for (u <- get_all_URLs(get_page(url))) 
-      crawl(u, n - 1)
+    for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
   }
 }
--- a/progs/app4.scala	Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/app4.scala	Sun Sep 21 23:23:43 2014 +0100
@@ -1,15 +1,17 @@
-val http_pattern = """\"https?://[^\"]*\"""".r
+val http_pattern = """"https?://[^"]*"""".r
 val my_urls = """urbanc""".r
 val email_pattern = 
   """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
 
+def print_str(s: String) = 
+  if (s == "") () else println(s)
+
 def crawl(url: String, n: Int) : Unit = {
   if (n == 0) ()
   else {
     println(s"Visiting: $n $url")
     val page = get_page(url)
-    println(email_pattern.findAllIn(page).mkString("\n"))
-    for (u <- get_all_URLs(page)) 
-      crawl(u, n - 1)
+    print_str(email_pattern.findAllIn(page).mkString("\n"))
+    for (u <- get_all_URLs(page).par) crawl(u, n - 1)
   }
 }
--- a/progs/crawler1.scala	Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/crawler1.scala	Sun Sep 21 23:23:43 2014 +0100
@@ -17,9 +17,9 @@
 // drops the first and last character from a string
 def unquote(s: String) = s.drop(1).dropRight(1)
 
-def get_all_URLs(page: String) : Set[String] = {
+def get_all_URLs(page: String) : Set[String] = 
   http_pattern.findAllIn(page).map(unquote).toSet
-}
+
 
 // naive version of crawl - searches until a given depth,
 // visits pages potentially more than once
--- a/progs/crawler2.scala	Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/crawler2.scala	Sun Sep 21 23:23:43 2014 +0100
@@ -17,9 +17,8 @@
 
 def unquote(s: String) = s.drop(1).dropRight(1)
 
-def get_all_URLs(page: String) : Set[String] = {
+def get_all_URLs(page: String) : Set[String] = 
   http_pattern.findAllIn(page).map(unquote).toSet
-}
 
 def crawl(url: String, n: Int) : Unit = {
   if (n == 0) ()
--- a/progs/crawler3.scala	Sun Sep 21 17:40:04 2014 +0100
+++ b/progs/crawler3.scala	Sun Sep 21 23:23:43 2014 +0100
@@ -17,9 +17,8 @@
 
 def unquote(s: String) = s.drop(1).dropRight(1)
 
-def get_all_URLs(page: String) : Set[String] = {
+def get_all_URLs(page: String) : Set[String] = 
   http_pattern.findAllIn(page).map(unquote).toSet
-}
 
 def print_str(s: String) = 
   if (s == "") () else println(s)
Binary file slides/slides01.pdf has changed
--- a/slides/slides01.tex	Sun Sep 21 17:40:04 2014 +0100
+++ b/slides/slides01.tex	Sun Sep 21 23:23:43 2014 +0100
@@ -6,6 +6,15 @@
 
 \hfuzz=220pt 
 
+%\setmonofont[Scale=.88]{Consolas}
+%\newfontfamily{\consolas}{Consolas}
+
+\lstset{language=Scala,
+        style=mystyle,
+        numbersep=0pt,
+        numbers=none,
+        xleftmargin=0mm}
+
 \newcommand{\bl}[1]{\textcolor{blue}{#1}}     
 
 % beamer stuff 
@@ -197,14 +206,19 @@
 \begin{frame}[c]
 \frametitle{Scala}
 
-\small a simple Scala function for reading webpages
+\small A simple Scala function for reading webpages:
+\smallskip
 
 \footnotesize
-\lstinputlisting{../progs/app0.scala}\pause
-\lstinline{get_page("""http://www.inf.kcl.ac.uk/staff/urbanc/""")}\pause\bigskip
+\lstinputlisting{../progs/app0.scala}
+\medskip\pause
+
+\lstinline{get_page("""http://www.inf.kcl.ac.uk/staff/urbanc/""")}
+\bigskip\medskip\pause
 
 
-\small slightly more complicated for handling errors properly:
+\small A slightly more complicated version for handling errors properly:
+\smallskip
 
 \footnotesize
 \lstinputlisting{../progs/app1.scala}
@@ -286,40 +300,36 @@
 \end{itemize}\bigskip
   
 \begin{center}  
-\only<1>{\code{"https?://[^\"]*"}}%
-\only<2>{\code{""""https?://[^\"]*"""".r}}
+\only<1>{\scode{"https?://[^"]*"}}%
+\only<2>{\scode{""""https?://[^"]*"""".r}}
 \end{center}\bigskip\bigskip
 
-matches for example\\  
-\code{"http://www.foobar.com"}\\
-\code{"https://www.tls.org"}\\
+matches for example\smallskip\\  
+\hspace{2mm}\code{"http://www.foobar.com"}\\
+\hspace{2mm}\code{"https://www.tls.org"}\\
 
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}[c]
+\begin{frame}[t]
+\frametitle{Finding Operations}
 
-\code{rexp.findAllIn(string)}\medskip
+{\bf\code{rexp.findAllIn(string)}}\medskip
   
 returns a list of all (sub)strings that match the 
-regular expression\bigskip\bigskip  
-  
-\code{rexp.findFirstIn(string)}\medskip
-  
-returns either \code{None} if no (sub)string matches 
-or \code{Some(s)} with the first (sub)string
+regular expression
+\bigskip\bigskip  
   
-\end{frame}
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
+
+{\bf\code{rexp.findFirstIn(string)}}\medskip
+ 
+returns either 
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\begin{frame}[c]
-
-\footnotesize
-\lstinputlisting{../progs/app2.scala}\medskip
-
-\code{crawl(some_start_URL, 2)}\
+\begin{itemize}
+\item \code{None} if no (sub)string matches or 
+\item \code{Some(s)} with the first (sub)string
+\end{itemize}
 
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
@@ -328,8 +338,18 @@
 \begin{frame}[c]
 
 \footnotesize
-a version that only ``crawls'' links in my domain:
+\lstinputlisting{../progs/app2.scala}
+
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{frame}[c]
+
+\small
+A version that only crawls links in ``my'' domain:
+
+\footnotesize
 \lstinputlisting{../progs/app3.scala}
 
 \end{frame}
@@ -337,9 +357,9 @@
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}[c]
-
-\footnotesize
-a little email ``harvester'':
+\lstset{xleftmargin=-4mm}
+\small
+A little email harvester:
 
 \footnotesize
 \lstinputlisting{../progs/app4.scala}\bigskip
@@ -350,8 +370,6 @@
 \end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
-
-  
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{frame}[t]
 \frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}}
@@ -359,17 +377,17 @@
 Their inductive definition:\medskip
 
 \begin{textblock}{6}(2,5)
-  \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l}
-  \bl{r} & \bl{$::=$}  & \bl{$\varnothing$}  & null\\
-         & \bl{$\mid$} & \bl{$\epsilon$}        & empty string / "" / []\\
-         & \bl{$\mid$} & \bl{c}                         & character\\
-         & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\
-         & \bl{$\mid$} & \bl{r$_1$ + r$_2$}  & alternative / choice\\
-         & \bl{$\mid$} & \bl{r$^*$}                   & star (zero or more)\\
+  \begin{tabular}{rrl@ {\hspace{13mm}}l}
+  \bl{$r$} & \bl{$::=$}  & \bl{$\varnothing$}   & null\\
+           & \bl{$\mid$} & \bl{$\epsilon$}      & empty string / \pcode{""} / \pcode{[]}\\
+           & \bl{$\mid$} & \bl{$c$}             & character\\
+           & \bl{$\mid$} & \bl{$r_1 \cdot r_2$} & sequence\\
+           & \bl{$\mid$} & \bl{$r_1 + r_2$}     & alternative / choice\\
+           & \bl{$\mid$} & \bl{$r^*$}           & star (zero or more)\\
   \end{tabular}
   \end{textblock}
   
-\end{frame}}
+\end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -377,7 +395,7 @@
 \frametitle{Regular Expressions}
 
 \small
-In Scala:
+In Scala:\bigskip
 
 \footnotesize
 \lstinputlisting{../progs/app51.scala}
@@ -393,21 +411,19 @@
   Regular Expression\end{tabular}}
 
 \begin{textblock}{15}(1,4)
- \begin{tabular}{@ {}rcl}
- \bl{$L$($\varnothing$)}  & \bl{$\dn$} & \bl{$\varnothing$}\\
- \bl{$L$($\epsilon$)}        & \bl{$\dn$} & \bl{$\{$""$\}$}\\
- \bl{$L$(c)}                         & \bl{$\dn$} & \bl{$\{$"c"$\}$}\\
- \bl{$L$(r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{$L$(r$_1$) $\cup$ $L$(r$_2$)}\\
- \bl{$L$(r$_1$ $\cdot$ r$_2$)}  & \bl{$\dn$} & \bl{$\{$ s$_1$ @ s$_2$ $|$ s$_1$ $\in$ $L$(r$_1$) $\wedge$ s$_2$ $\in$ 
-     $L$(r$_2$) $\}$}\\
- \bl{$L$(r$^*$)}                   & \bl{$\dn$} & \onslide<4->{\bl{$\bigcup_{n \ge 0}$ $L$(r)$^n$}}\\
+ \begin{tabular}{rcl}
+ \bl{$L(\varnothing)$}  & \bl{$\dn$} & \bl{$\varnothing$}\\
+ \bl{$L(\epsilon)$}     & \bl{$\dn$} & \bl{$\{[]\}$}\\
+ \bl{$L(c)$}            & \bl{$\dn$} & \bl{$\{[c]\}$}\\
+ \bl{$L(r_1 + r_2)$}    & \bl{$\dn$} & \bl{$L(r_1) \cup L(r_2)$}\\
+ \bl{$L(r_1 \cdot r_2)$} & \bl{$\dn$} & \bl{$\{ s_1 \,@\, s_2 \;|\; s_1 \in L(r_1) \wedge s_2 \in L(r_2) \}$}\\
+ \bl{$L(r^*)$}           & \bl{$\dn$} & \onslide<4->{\bl{$\bigcup_{n \ge 0} L(r)^n$}}\\
   \end{tabular}\bigskip
   
 \onslide<2->{
-\hspace{5mm}\bl{$L$(r)$^0$ $\;\dn\;$ $\{$""$\}$}\\
-\bl{$L$(r)$^{n+1}$ $\;\dn\;$ $L$(r) @ $L$(r)$^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\
-\small\hspace{5cm}\textcolor{gray}{$\{$ s$_1$ @ s$_2$ $|$ s$_1$ $\in$ $L$(r) $\wedge$ s$_2$ $\in$ 
-     $L$(r)$^n$ $\}$}}
+\hspace{5mm}\bl{$L(r)^0 \;\dn\; \{[]\}$}\\
+\bl{$L(r)^{n+1} \;\dn\; L(r) \,@\, L(r)^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\
+\small\hspace{5cm}\textcolor{gray}{$\{ s_1 @ s_2 \;|\; s_1\in L(r) \wedge s_2 \in L(r)^n \}$}}
 }  
     \end{textblock}
 
@@ -415,18 +431,20 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\mode<presentation>{
 \begin{frame}[c]
-\frametitle{\begin{tabular}{c}The Meaning of Matching\end{tabular}}
+\frametitle{The Meaning of Matching}
 
+\begin{bubble}[10cm]
 \large
-a regular expression \bl{r} matches a string \bl{s} is defined as
+A regular expression \bl{$r$} matches a string \bl{$s$} 
+provided
 
 \begin{center}
-\bl{s $\in$ $L$(r)}\\ 
+\bl{$s \in L(r)$}\\ 
 \end{center}
+\end{bubble}
 
-\end{frame}}
+\end{frame}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%