handouts/ho01.tex
changeset 112 95ee5cc5c05d
parent 111 1933e88cb73e
child 113 db6862f6bf6c
equal deleted inserted replaced
111:1933e88cb73e 112:95ee5cc5c05d
     2 \usepackage{charter}
     2 \usepackage{charter}
     3 \usepackage{hyperref}
     3 \usepackage{hyperref}
     4 \usepackage{amssymb}
     4 \usepackage{amssymb}
     5 \usepackage{amsmath}
     5 \usepackage{amsmath}
     6 \usepackage[T1]{fontenc}
     6 \usepackage[T1]{fontenc}
       
     7 \usepackage{listings}
       
     8 \usepackage{xcolor}
     7 
     9 
     8 \newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
    10 \newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
     9 
    11 
       
    12 \definecolor{javared}{rgb}{0.6,0,0} % for strings
       
    13 \definecolor{javagreen}{rgb}{0.25,0.5,0.35} % comments
       
    14 \definecolor{javapurple}{rgb}{0.5,0,0.35} % keywords
       
    15 \definecolor{javadocblue}{rgb}{0.25,0.35,0.75} % javadoc
       
    16 
       
    17 \lstdefinelanguage{scala}{
       
    18   morekeywords={abstract,case,catch,class,def,%
       
    19     do,else,extends,false,final,finally,%
       
    20     for,if,implicit,import,match,mixin,%
       
    21     new,null,object,override,package,%
       
    22     private,protected,requires,return,sealed,%
       
    23     super,this,throw,trait,true,try,%
       
    24     type,val,var,while,with,yield},
       
    25   otherkeywords={=>,<-,<\%,<:,>:,\#,@},
       
    26   sensitive=true,
       
    27   morecomment=[l]{//},
       
    28   morecomment=[n]{/*}{*/},
       
    29   morestring=[b]",
       
    30   morestring=[b]',
       
    31   morestring=[b]"""
       
    32 }
       
    33 
       
    34 \lstset{language=Scala,
       
    35 	basicstyle=\ttfamily,
       
    36 	keywordstyle=\color{javapurple}\bfseries,
       
    37 	stringstyle=\color{javagreen},
       
    38 	commentstyle=\color{javagreen},
       
    39 	morecomment=[s][\color{javadocblue}]{/**}{*/},
       
    40 	numbers=left,
       
    41 	numberstyle=\tiny\color{black},
       
    42 	stepnumber=1,
       
    43 	numbersep=10pt,
       
    44 	tabsize=2,
       
    45 	showspaces=false,
       
    46 	showstringspaces=false}
       
    47 	
    10 \begin{document}
    48 \begin{document}
    11 
    49 
    12 \section*{Handout 1}
    50 \section*{Handout 1}
    13 
    51 
    14 This course is about the processing of strings. Lets start with what we mean by \emph{strings}. Strings
    52 This course is about the processing of strings. Lets start with what we mean by \emph{strings}. Strings
   236 \end{center}
   274 \end{center}
   237 
   275 
   238 \noindent
   276 \noindent
   239 This means we can now precisely state what the meaning, for example, of the regular expression 
   277 This means we can now precisely state what the meaning, for example, of the regular expression 
   240 ${\it h} \cdot {\it e} \cdot {\it l} \cdot {\it l} \cdot {\it o}$ is, namely 
   278 ${\it h} \cdot {\it e} \cdot {\it l} \cdot {\it l} \cdot {\it o}$ is, namely 
   241 $L({\it h} \cdot {\it e} \cdot {\it l} \cdot {\it l} \cdot {\it o}) = \{\text{\it"hello"}\}$. Similarly if we have the choice
   279 $L({\it h} \cdot {\it e} \cdot {\it l} \cdot {\it l} \cdot {\it o}) = \{\text{\it"hello"}\}$...as expected. Similarly if we have the 
   242 $a + b$, the meaning is $L(a + b) = \{\text{\it"a"}, \text{\it"b"}\}$, namely the only two strings which can possibly
   280 choice-regular-expression $a + b$, its meaning is $L(a + b) = \{\text{\it"a"}, \text{\it"b"}\}$, namely the only two strings which can possibly
   243 be matched by this choice. 
   281 be matched by this choice. You can now also conclude why we do not make a difference
   244 
   282 between the different regular expressions $(r_1 + r_2) + r_3$ and $r_1 + (r_2 + r_3)$....they 
   245 The point of this definition is that we can now precisely specify when a string is matched by a
   283 are not the same regular expression, but have the same meaning. 
   246 regular expression, namely a string, say $s$, is matched by a regular expression, say $r$, if
   284 
   247 and only if $s \in L(r)$. In fact we will write a program {\it match} that takes any string $s$ and
   285 The point of the definition of $L$ is that we can now precisely specify when a string $s$ is matched by a
       
   286 regular expression $r$, namely only when $s \in L(r)$. In fact we will write a program {\it match} that takes any string $s$ and
   248 any regular expression $r$ as argument and returns \emph{yes}, if $s \in L(r)$ and \emph{no},
   287 any regular expression $r$ as argument and returns \emph{yes}, if $s \in L(r)$ and \emph{no},
   249 if $s \not\in L(r)$. We leave this for the next lecture.
   288 if $s \not\in L(r)$. We leave this for the next lecture.
       
   289 
       
   290 \begin{figure}[p]
       
   291 {\lstset{language=Scala}\texttt{\lstinputlisting{../progs/crawler1.scala}}}
       
   292 \caption{Scala code for a web-crawler that can detect broken links in a web-page. It uses
       
   293 the regular expression {\tt http\_pattern} in Line~15 for recognising URL-addresses. It finds
       
   294 all links using the library function {\tt findAllIn} in Line~21.}
       
   295 \end{figure}
       
   296 
       
   297 \begin{figure}[p]
       
   298 {\lstset{language=Scala}\texttt{\lstinputlisting{../progs/crawler2.scala}}}
       
   299 \caption{A version of the web-crawler which only follows links in ``my'' domain---since these are the
       
   300 ones I am interested in to fix. It uses the regular expression {\tt my\_urls} in Line~16.
       
   301 The main change is in Line~26 where we test whether URL is in our domain or not.}
       
   302 
       
   303 \end{figure}
       
   304 
       
   305 \begin{figure}[p]
       
   306 {\lstset{language=Scala}\texttt{\lstinputlisting{../progs/crawler3.scala}}}
       
   307 \caption{A small email harvester---whenever we download a web-page, we also check whether
       
   308 it contains any email addresses. For this we use the regular expression {\tt email\_pattern} in
       
   309 Line~17.  The main change is in Lines 33 and 34 where we print all email addresses
       
   310 we can find in a page.}
       
   311 \end{figure}
       
   312 
   250 \end{document}
   313 \end{document}
   251 
   314 
   252 %%% Local Variables: 
   315 %%% Local Variables: 
   253 %%% mode: latex
   316 %%% mode: latex
   254 %%% TeX-master: t
   317 %%% TeX-master: t