# HG changeset patch # User Christian Urban # Date 1348743581 -3600 # Node ID 73cf4406b7738b510f15afe5d83bbff2f8b4ff8c # Parent 0da19c346e240418ab9946278562a61c9c43c939 updated diff -r 0da19c346e24 -r 73cf4406b773 app2.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/app2.scala Thu Sep 27 11:59:41 2012 +0100 @@ -0,0 +1,16 @@ +val http_pattern = """\"https?://[^\"]*\"""".r + +def unquote(s: String) = s.drop(1).dropRight(1) + +def get_all_URLs(page: String) : Set[String] = { + (http_pattern.findAllIn(page)).map { unquote(_) }.toSet +} + +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else { + println("Visiting: " + n + " " + url) + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) + } +} + diff -r 0da19c346e24 -r 73cf4406b773 app3.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/app3.scala Thu Sep 27 11:59:41 2012 +0100 @@ -0,0 +1,10 @@ +val my_urls = """urbanc""".r + +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + else if (my_urls.findFirstIn(url) == None) () + else { + println("Visiting: " + n + " " + url) + for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1) + } +} diff -r 0da19c346e24 -r 73cf4406b773 app4.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/app4.scala Thu Sep 27 11:59:41 2012 +0100 @@ -0,0 +1,15 @@ +val http_pattern = """\"https?://[^\"]*\"""".r +val my_urls = """urbanc""".r +val email_pattern = + """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r + +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + //else if (my_urls.findFirstIn(url) == None) () + else { + println("Visiting: " + n + " " + url) + val page = get_page(url) + println(email_pattern.findAllIn(page).mkString("\n")) + for (u <- get_all_URLs(page)) crawl(u, n - 1) + } +} diff -r 0da19c346e24 -r 73cf4406b773 app5.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/app5.scala Thu Sep 27 11:59:41 2012 +0100 @@ -0,0 +1,8 @@ +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} diff -r 0da19c346e24 -r 73cf4406b773 app51.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/app51.scala Thu Sep 27 11:59:41 2012 +0100 @@ -0,0 +1,8 @@ +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp diff -r 0da19c346e24 -r 73cf4406b773 app6.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/app6.scala Thu Sep 27 11:59:41 2012 +0100 @@ -0,0 +1,11 @@ +def deriv (r: Rexp, c: Char) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(deriv(r1, c), deriv(r2, c)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(deriv(r1, c), r2), deriv(r2, c)) + else SEQ(deriv(r1, c), r2) + case STAR(r) => SEQ(deriv(r, c), STAR(r)) +} + diff -r 0da19c346e24 -r 73cf4406b773 app7.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/app7.scala Thu Sep 27 11:59:41 2012 +0100 @@ -0,0 +1,17 @@ +def matches(r: Rexp, s: String) : Boolean = + nullable(derivs(r, s.toList)) + + +/* Examples */ + +println(matches(SEQ(SEQ(CHAR('c'), CHAR('a')), CHAR('b')),"cab")) +println(matches(STAR(CHAR('a')),"aaa")) + +/* Convenience using implicits */ +implicit def string2rexp(s : String) : Rexp = { + s.foldRight (EMPTY: Rexp) ( (c, r) => SEQ(CHAR(c), r) ) +} + +println(matches("cab" ,"cab")) +println(matches(STAR("a"),"aaa")) +println(matches(STAR("a"),"aaab")) diff -r 0da19c346e24 -r 73cf4406b773 crawler1.scala --- a/crawler1.scala Wed Sep 26 08:49:47 2012 +0100 +++ b/crawler1.scala Thu Sep 27 11:59:41 2012 +0100 @@ -36,6 +36,8 @@ // staring URL for the crawler val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" +//val startURL = """http://www.inf.kcl.ac.uk/staff/mml/""" + // call on the command line crawl(startURL, 2) diff -r 0da19c346e24 -r 73cf4406b773 crawler2.scala --- a/crawler2.scala Wed Sep 26 08:49:47 2012 +0100 +++ b/crawler2.scala Thu Sep 27 11:59:41 2012 +0100 @@ -40,5 +40,5 @@ // can now deal with depth 3 // start on command line -crawl(startURL, 3) +crawl(startURL, 4) diff -r 0da19c346e24 -r 73cf4406b773 crawler3.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crawler3.scala Thu Sep 27 11:59:41 2012 +0100 @@ -0,0 +1,49 @@ +import io.Source +import scala.util.matching.Regex + +// gets the first ~10K of a page +def get_page(url: String) : String = { + try { + Source.fromURL(url).take(10000).mkString + } + catch { + case e => { + println(" Problem with: " + url) + "" + } + } +} + +// staring URL for the crawler +val startURL = """http://www.inf.kcl.ac.uk/staff/urbanc/""" + +// regex for URLs +val http_pattern = """\"https?://[^\"]*\"""".r +val my_urls = """urbanc""".r +val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r + +// http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/ + +def unquote(s: String) = s.drop(1).dropRight(1) + +def get_all_URLs(page: String) : Set[String] = { + (http_pattern.findAllIn(page)).map { unquote(_) }.toSet +} + +// naive version - seraches until a given depth +// visits pages potentially more than once +def crawl(url: String, n: Int) : Unit = { + if (n == 0) () + //else if (my_urls.findFirstIn(url) == None) () + else { + println("Visiting: " + n + " " + url) + val page = get_page(url) + println(email_pattern.findAllIn(page).mkString("\n")) + for (u <- get_all_URLs(page)) crawl(u, n - 1) + } +} + +// can now deal with depth 3 +// start on command line +crawl(startURL, 3) + diff -r 0da19c346e24 -r 73cf4406b773 regexp.scala --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexp.scala Thu Sep 27 11:59:41 2012 +0100 @@ -0,0 +1,52 @@ +abstract class Rexp + +case object NULL extends Rexp +case object EMPTY extends Rexp +case class CHAR(c: Char) extends Rexp +case class ALT(r1: Rexp, r2: Rexp) extends Rexp +case class SEQ(r1: Rexp, r2: Rexp) extends Rexp +case class STAR(r: Rexp) extends Rexp + +// whether it can match the empty string +def nullable (r: Rexp) : Boolean = r match { + case NULL => false + case EMPTY => true + case CHAR(_) => false + case ALT(r1, r2) => nullable(r1) || nullable(r2) + case SEQ(r1, r2) => nullable(r1) && nullable(r2) + case STAR(_) => true +} + +// derivative of a regular expression +def deriv (r: Rexp, c: Char) : Rexp = r match { + case NULL => NULL + case EMPTY => NULL + case CHAR(d) => if (c == d) EMPTY else NULL + case ALT(r1, r2) => ALT(deriv(r1, c), deriv(r2, c)) + case SEQ(r1, r2) => + if (nullable(r1)) ALT(SEQ(deriv(r1, c), r2), deriv(r2, c)) + else SEQ(deriv(r1, c), r2) + case STAR(r) => SEQ(deriv(r, c), STAR(r)) +} + +def derivs (r: Rexp, s: List[Char]) : Rexp = s match { + case Nil => r + case c::cs => derivs(deriv(r, c), cs) +} + +// regular expression matching +def matches(r: Rexp, s: String) : Boolean = nullable(derivs(r, s.toList)) + +/* Examples */ + +println(matches(SEQ(SEQ(CHAR('c'), CHAR('a')), CHAR('b')),"cab")) +println(matches(STAR(CHAR('a')),"aaa")) + +/* Convenience using implicits */ +implicit def string2rexp(s : String) : Rexp = { + s.foldRight (EMPTY: Rexp) ( (c, r) => SEQ(CHAR(c), r) ) +} + +println(matches("cab" ,"cab")) +println(matches(STAR("a"),"aaa")) +println(matches(STAR("a"),"aaab")) diff -r 0da19c346e24 -r 73cf4406b773 scraper.scala --- a/scraper.scala Wed Sep 26 08:49:47 2012 +0100 +++ b/scraper.scala Thu Sep 27 11:59:41 2012 +0100 @@ -21,14 +21,14 @@ //receiving data val page = fromInputStream(conn.getInputStream).getLines.mkString("\n") -println(page) +//println(page) // regular expression . excludes newlines, // therefore we have to use [\S\s] val regex1 = """[\S\s]*?""".r val rows = regex1.findAllIn(page).toList -print(rows) +//print(rows) val regex2 = """([\S\s]*?)""".r diff -r 0da19c346e24 -r 73cf4406b773 slides01.pdf Binary file slides01.pdf has changed diff -r 0da19c346e24 -r 73cf4406b773 slides01.tex --- a/slides01.tex Wed Sep 26 08:49:47 2012 +0100 +++ b/slides01.tex Thu Sep 27 11:59:41 2012 +0100 @@ -382,7 +382,7 @@ \frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} {\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinputlisting{app4.scala}}} +\texttt{\lstinputlisting{app51.scala}}} \end{frame}} @@ -431,103 +431,20 @@ \end{frame}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Nullability\end{tabular}} - -\small -whether a regular expression matches the empty string:\medskip - - -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinputlisting{app5.scala}}} - - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}Derivative of a Rexp\end{tabular}} - -\large -If \bl{r} matches the string \bl{c::s}, what is a regular expression that matches \bl{s}?\bigskip\bigskip\bigskip\bigskip - -\small -\bl{der c r} gives the answer -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Derivative\end{tabular}} - -\begin{center} -\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} - \bl{der c ($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\ - \bl{der c ($\epsilon$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\ - \bl{der c (d)} & \bl{$\dn$} & \bl{if c $=$ d then [] else $\varnothing$} & \\ - \bl{der c (r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{(der c r$_1$) + (der c r$_2$)} & \\ - \bl{der c (r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{((der c r$_1$) $\cdot$ r$_2$) + } & \\ - & & \bl{\hspace{3mm}(if nullable r$_1$ then der c r$_2$ else $\varnothing$)}\\ - \bl{der c (r$^*$)} & \bl{$\dn$} & \bl{(der c r) $\cdot$ (r$^*$)} &\smallskip\\\pause - - \bl{ders [] r} & \bl{$\dn$} & \bl{r} & \\ - \bl{ders (c::s) r} & \bl{$\dn$} & \bl{ders s (der c r)} & \\ - \end{tabular} -\end{center} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Derivative\end{tabular}} - - -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinputlisting{app6.scala}}} - - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[c] -\frametitle{\begin{tabular}{c}The Rexp Matcher\end{tabular}} - - -{\lstset{language=Scala}\fontsize{8}{10}\selectfont -\texttt{\lstinputlisting{app7.scala}}} - - - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \mode{ \begin{frame}[c] \frametitle{\begin{tabular}{c}This Course\end{tabular}} -We will have a look at +We will have a look at: \begin{itemize} -\item regular expression / regular expression matching -\item a bit of sets (of strings) +\item regular expressions / regular expression matching \item automata \item the Myhill-Nerode theorem \item parsing \item grammars -\item a small interpreter / webbrowser +\item a small interpreter / web browser \end{itemize} \end{frame}} @@ -541,7 +458,7 @@ \frametitle{\begin{tabular}{c}Exam\end{tabular}} \begin{itemize} -\item The question ``Is this relevant for the exams?'' is not appreciated!\bigskip\\ +\item The question ``Is this relevant for the exam?'' is not appreciated!\bigskip\\ Whatever is in the homework sheets (and is not marked optional) is relevant for the exam.\\ No code needs to be written. @@ -551,30 +468,6 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\mode{ -\begin{frame}[t] -\frametitle{\begin{tabular}{c}Maps in Scala\end{tabular}} - -\begin{itemize} -\item {\bf\texttt{map}} takes a function, say f, and applies it to every element of the list: -\end{itemize} - -\begin{textblock}{15}(2,7) -\fontsize{13}{14}\selectfont -\bf\texttt{List(1, 2, 3, 4, 5, 6, 7, 8, 9)} -\end{textblock} - -\begin{textblock}{15}(2,10) -\fontsize{13}{14}\selectfont -\bf\texttt{List(1, 4, 9, 16, 25, 36, 49, 64, 81)} -\end{textblock} - -\end{frame}} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - \end{document} %%% Local Variables: diff -r 0da19c346e24 -r 73cf4406b773 slides02.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/slides02.tex Thu Sep 27 11:59:41 2012 +0100 @@ -0,0 +1,583 @@ +\documentclass[dvipsnames,14pt,t]{beamer} +\usepackage{beamerthemeplainculight} +\usepackage[T1]{fontenc} +\usepackage[latin1]{inputenc} +\usepackage{mathpartir} +\usepackage[absolute,overlay]{textpos} +\usepackage{ifthen} +\usepackage{tikz} +\usepackage{pgf} +\usepackage{calc} +\usepackage{ulem} +\usepackage{courier} +\usepackage{listings} +\renewcommand{\uline}[1]{#1} +\usetikzlibrary{arrows} +\usetikzlibrary{automata} +\usetikzlibrary{shapes} +\usetikzlibrary{shadows} +\usetikzlibrary{positioning} +\usetikzlibrary{calc} +\usepackage{graphicx} + +\definecolor{javared}{rgb}{0.6,0,0} % for strings +\definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments +\definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords +\definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc + +\lstset{language=Java, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +\lstdefinelanguage{scala}{ + morekeywords={abstract,case,catch,class,def,% + do,else,extends,false,final,finally,% + for,if,implicit,import,match,mixin,% + new,null,object,override,package,% + private,protected,requires,return,sealed,% + super,this,throw,trait,true,try,% + type,val,var,while,with,yield}, + otherkeywords={=>,<-,<\%,<:,>:,\#,@}, + sensitive=true, + morecomment=[l]{//}, + morecomment=[n]{/*}{*/}, + morestring=[b]", + morestring=[b]', + morestring=[b]""" +} + +\lstset{language=Scala, + basicstyle=\ttfamily, + keywordstyle=\color{javapurple}\bfseries, + stringstyle=\color{javagreen}, + commentstyle=\color{javagreen}, + morecomment=[s][\color{javadocblue}]{/**}{*/}, + numbers=left, + numberstyle=\tiny\color{black}, + stepnumber=1, + numbersep=10pt, + tabsize=2, + showspaces=false, + showstringspaces=false} + +% beamer stuff +\renewcommand{\slidecaption}{AFL 01, King's College London, 26.~September 2012} + + +\begin{document} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}<1>[t] +\frametitle{% + \begin{tabular}{@ {}c@ {}} + \\[-3mm] + \LARGE Automata and \\[-2mm] + \LARGE Formal Languages (1)\\[-3mm] + \end{tabular}} + + \begin{center} + \includegraphics[scale=0.3]{pics/ante1.jpg}\hspace{5mm} + \includegraphics[scale=0.31]{pics/ante2.jpg}\\ + \footnotesize\textcolor{gray}{Antikythera automaton, 100 BC (Archimedes?)} + \end{center} + +\normalsize + \begin{center} + \begin{tabular}{ll} + Email: & christian.urban at kcl.ac.uk\\ + Of$\!$fice: & S1.27 (1st floor Strand Building)\\ + Slides: & KEATS + \end{tabular} + \end{center} + + +\end{frame}} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\begin{textblock}{1}(2,5) +\begin{tabular}{c} +\includegraphics[scale=0.15]{pics/servers.png}\\[-2mm] +\small Server +\end{tabular} +\end{textblock} + +\begin{textblock}{1}(5.6,4) + \begin{tikzpicture}[scale=1.1] + \draw[white] (0,1) node (X) {}; + \draw[white] (2,1) node (Y) {}; + \draw[white] (0,0) node (X1) {}; + \draw[white] (2,0) node (Y1) {}; + \draw[white] (0,-1) node (X2) {}; + \draw[white] (2,-1) node (Y2) {}; + \draw[red, <-, line width = 2mm] (X) -- (Y); + \node [inner sep=5pt,label=above:\textcolor{black}{GET request}] at ($ (X)!.5!(Y) $) {}; + \draw[red, ->, line width = 2mm] (X1) -- (Y1); + \node [inner sep=5pt,label=above:\textcolor{black}{webpage}] at ($ (X1)!.5!(Y1) $) {}; + \draw[red, <-, line width = 2mm] (X2) -- (Y2); + \node [inner sep=7pt,label=above:\textcolor{black}{POST data}] at ($ (X2)!.5!(Y2) $) {}; + \end{tikzpicture} +\end{textblock} + + +\begin{textblock}{1}(9,5.5) +\begin{tabular}{c} +\includegraphics[scale=0.15]{pics/laptop.png}\\[-2mm] +\small Browser +\end{tabular} +\end{textblock} + +\only<2>{ +\begin{textblock}{10}(2,13.5) +\begin{itemize} +\item programming languages, compilers +\end{itemize} +\end{textblock}} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +transforming strings into structured data\\[10mm] + +{\LARGE\bf Lexing}\medskip\\ +\hspace{5mm}(recognising ``words'')\\[6mm] + +{\LARGE\bf Parsing}\medskip\\ +\hspace{5mm}(recognising ``sentences'') + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +The subject is quite old: + +\begin{itemize} +\item Turing Machines, 1936 +\item first compiler for COBOL, 1957 (Grace Hopper) +\item but surprisingly research papers are still published now +\end{itemize} + +\begin{flushright} +\includegraphics[scale=0.3]{pics/hopper.jpg}\\ +\footnotesize\textcolor{gray}{Grace Hopper} +\end{flushright} + +{\footnotesize\textcolor{gray}{(she made it to David Letterman's Tonight Show, \url{http://www.youtube.com/watch?v=aZOxtURhfEU})}} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}This Course\end{tabular}} + +\begin{itemize} +\item the ultimate goal is to implement a small web-browser (really small one)\bigskip +\end{itemize} + +Let's start with: + +\begin{itemize} +\item a web-crawler +\item an email harvester +\item a web-scraper +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}A Web Crawler\end{tabular}} + +\mbox{}\\[10mm] + +\begin{enumerate} +\item given an URL, read the corresponding webpage +\item extract all links from it +\item call the web-crawler again for all these links +\end{enumerate} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}A Web Crawler\end{tabular}} + +\mbox{}\\[10mm] + + +\begin{enumerate} +\item given an URL, read the corresponding webpage +\item if not possible print, out a problem +\item if possible, extract all links from it +\item call the web-crawler again for all these links +\end{enumerate}\bigskip\pause + +\small (we need a bound for the number of recursive calls) + +\small (the purpose is to check all links on my own webpage) +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Scala\end{tabular}} + +\footnotesize a simple Scala function for reading webpages\\[-3mm] + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app0.scala}}}\pause +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinline{get_page("""http://www.inf.kcl.ac.uk/staff/urbanc/""")}}}\pause\bigskip + + +\footnotesize slightly more complicated for handling errors properly:\\[-3mm] + +\footnotesize +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app1.scala}}} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}A Regular Expression\end{tabular}} + +\begin{itemize} +\item \ldots{} is a pattern or template for specifying strings +\end{itemize}\bigskip + +\begin{center} +\only<1>{{\lstset{language=Scala}\fontsize{18}{19}\selectfont\bf +\texttt{"https?://[$\hat{\hspace{2mm}}$"]*"}}}% +\only<2>{{\lstset{language=Scala}\fontsize{18}{19}\selectfont\bf +\texttt{"""\textbackslash{}"https?://[$\hat{\hspace{2mm}}$\textbackslash{}"]*\textbackslash{}"""".r}}} +\end{center}\bigskip\bigskip + +matches for example\\ +\;{\lstset{language=Scala}\fontsize{12}{14}\selectfont\bf +\texttt{"http://www.foobar.com"}}\\ +\;{\lstset{language=Scala}\fontsize{12}{14}\selectfont\bf +\texttt{"https://www.tls.org"}}\\ + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +{\lstset{language=Scala}\fontsize{18}{19}\selectfont\bf +\texttt{rexp.findAllIn(string)}}\medskip + +returns a list of all (sub)strings that match the regular expression\bigskip\bigskip + +{\lstset{language=Scala}\fontsize{18}{19}\selectfont\bf +\texttt{rexp.findFirstIn(string)}}\medskip + +returns either {\bf\texttt{None}} if no (sub)string matches +or {\bf\texttt{Some(s)}} with the first (sub)string + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app2.scala}}}\medskip + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{crawl(some\_start\_URL, 2)}}\ + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\footnotesize +a version that only ``crawls'' links in my domain: + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app3.scala}}} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] + +\footnotesize +a little email ``harvester'': + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app4.scala}}}\bigskip + +\tiny +\textcolor{gray}{\url{http://net.tutsplus.com/tutorials/other/8-regular-expressions-you-should-know/}} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\newcommand{\bl}[1]{\textcolor{blue}{#1}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} + +\begin{textblock}{6}(2,4) + \begin{tabular}{@ {}rrl@ {\hspace{13mm}}l} + \bl{r} & \bl{$::=$} & \bl{$\varnothing$} & null\\ + & \bl{$\mid$} & \bl{$\epsilon$} & empty string / "" / []\\ + & \bl{$\mid$} & \bl{c} & character\\ + & \bl{$\mid$} & \bl{r$_1$ $\cdot$ r$_2$} & sequence\\ + & \bl{$\mid$} & \bl{r$_1$ + r$_2$} & alternative / choice\\ + & \bl{$\mid$} & \bl{r$^*$} & star (zero or more)\\ + \end{tabular} + \end{textblock} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Regular Expressions\end{tabular}} + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app51.scala}}} + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% for definitions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Meaning of a\\[-2mm] Regular Expression\end{tabular}} + +\begin{textblock}{15}(1,4) + \begin{tabular}{@ {}rcl} + \bl{$L$($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$}\\ + \bl{$L$($\epsilon$)} & \bl{$\dn$} & \bl{$\{$""$\}$}\\ + \bl{$L$(c)} & \bl{$\dn$} & \bl{$\{$"c"$\}$}\\ + \bl{$L$(r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{$L$(r$_1$) $\cup$ $L$(r$_2$)}\\ + \bl{$L$(r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{$\{$ s$_1$ @ s$_2$ $|$ s$_1$ $\in$ $L$(r$_1$) $\wedge$ s$_2$ $\in$ + $L$(r$_2$) $\}$}\\ + \bl{$L$(r$^*$)} & \bl{$\dn$} & \onslide<4->{\bl{$\bigcup_{n \ge 0}$ $L$(r)$^n$}}\\ + \end{tabular}\bigskip + +\onslide<2->{ +\hspace{5mm}\bl{$L$(r)$^0$ $\;\dn\;$ $\{$""$\}$}\\ +\bl{$L$(r)$^{n+1}$ $\;\dn\;$ $L$(r) @ $L$(r)$^n$}\hspace{9mm}\onslide<3->{\small\textcolor{gray}{(append on sets)}\\ +\small\hspace{5cm}\textcolor{gray}{$\{$ s$_1$ @ s$_2$ $|$ s$_1$ $\in$ $L$(r) $\wedge$ s$_2$ $\in$ + $L$(r)$^n$ $\}$}} +} + \end{textblock} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Meaning of a Matching\end{tabular}} + +\large +a regular expression \bl{r} matches a string \bl{s} is defined as + +\begin{center} +\bl{s $\in$ $L$(r)}\\ +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Nullability\end{tabular}} + +\small +whether a regular expression matches the empty string:\medskip + + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app5.scala}}} + + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Derivative of a Rexp\end{tabular}} + +\large +If \bl{r} matches the string \bl{c::s}, what is a regular expression that matches \bl{s}?\bigskip\bigskip\bigskip\bigskip + +\small +\bl{der c r} gives the answer +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Derivative\end{tabular}} + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}} + \bl{der c ($\varnothing$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\ + \bl{der c ($\epsilon$)} & \bl{$\dn$} & \bl{$\varnothing$} & \\ + \bl{der c (d)} & \bl{$\dn$} & \bl{if c $=$ d then [] else $\varnothing$} & \\ + \bl{der c (r$_1$ + r$_2$)} & \bl{$\dn$} & \bl{(der c r$_1$) + (der c r$_2$)} & \\ + \bl{der c (r$_1$ $\cdot$ r$_2$)} & \bl{$\dn$} & \bl{((der c r$_1$) $\cdot$ r$_2$) + } & \\ + & & \bl{\hspace{3mm}(if nullable r$_1$ then der c r$_2$ else $\varnothing$)}\\ + \bl{der c (r$^*$)} & \bl{$\dn$} & \bl{(der c r) $\cdot$ (r$^*$)} &\smallskip\\\pause + + \bl{ders [] r} & \bl{$\dn$} & \bl{r} & \\ + \bl{ders (c::s) r} & \bl{$\dn$} & \bl{ders s (der c r)} & \\ + \end{tabular} +\end{center} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Derivative\end{tabular}} + + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app6.scala}}} + + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}The Rexp Matcher\end{tabular}} + + +{\lstset{language=Scala}\fontsize{8}{10}\selectfont +\texttt{\lstinputlisting{app7.scala}}} + + + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}This Course\end{tabular}} + +We will have a look at: + +\begin{itemize} +\item regular expressions / regular expression matching +\item automata +\item the Myhill-Nerode theorem +\item parsing +\item grammars +\item a small interpreter / web browser +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[c] +\frametitle{\begin{tabular}{c}Exam\end{tabular}} + +\begin{itemize} +\item The question ``Is this relevant for the exam?'' is not appreciated!\bigskip\\ + +Whatever is in the homework sheets (and is not marked optional) is relevant for the +exam.\\ No code needs to be written. +\end{itemize} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\mode{ +\begin{frame}[t] +\frametitle{\begin{tabular}{c}Maps in Scala\end{tabular}} + +\begin{itemize} +\item {\bf\texttt{map}} takes a function, say f, and applies it to every element of the list: +\end{itemize} + +\begin{textblock}{15}(2,7) +\fontsize{13}{14}\selectfont +\bf\texttt{List(1, 2, 3, 4, 5, 6, 7, 8, 9)} +\end{textblock} + +\begin{textblock}{15}(2,10) +\fontsize{13}{14}\selectfont +\bf\texttt{List(1, 4, 9, 16, 25, 36, 49, 64, 81)} +\end{textblock} + +\end{frame}} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: +