afl-material: changeset 399:5c1fbb39c93e

--- a/graphics.sty	Tue Mar 22 17:09:24 2016 +0000
+++ b/graphics.sty	Wed Apr 06 11:51:33 2016 +0100
@@ -10,6 +10,7 @@
 \usepackage{graphicx} 
 \usepackage{pgfplots}
 
+\pgfplotsset{compat=1.11}
 
 \newenvironment{bubble}[1][]{%
 \addtolength{\leftmargini}{4mm}%

Binary file handouts/ho01.pdf has changed

--- a/handouts/ho01.tex	Tue Mar 22 17:09:24 2016 +0000
+++ b/handouts/ho01.tex	Wed Apr 06 11:51:33 2016 +0100
@@ -10,7 +10,7 @@
 %%https://www.reddit.com/r/ProgrammingLanguages/comments/43wlkq/formal_grammar_for_csh_tsch_sh_or_bash/
 
 \begin{document}
-\fnote{\copyright{} Christian Urban, 2014, 2015}
+\fnote{\copyright{} Christian Urban, King's College London, 2014, 2015, 2016}
 
 \section*{Handout 1}
 
@@ -26,10 +26,10 @@
 followed by zero or more letters, numbers and underscores.
 Also often we face the problem that we are given a string (for
 example some user input) and want to know whether it matches a
-particular pattern. In this way we can, for example, exclude
-user input that would otherwise have nasty effects on our
-program (crashing it or making it go into an infinite loop, if
-not worse).
+particular pattern---be it an email address, for example. In
+this way we can exclude user input that would otherwise have
+nasty effects on our program (crashing it or making it go into
+an infinite loop, if not worse).
 
 \defn{Regular expressions} help with conveniently specifying
 such patterns. The idea behind regular expressions is that
@@ -38,8 +38,8 @@
 computer science. For example there is no convenient regular
 expression for describing the English language short of
 enumerating all English words. But they seem useful for
-describing for example email addresses.\footnote{See ``8
-Regular Expressions You Should Know''
+describing for example simple email addresses.\footnote{See
+``8 Regular Expressions You Should Know''
 \url{http://goo.gl/5LoVX7}} Consider the following regular
 expression
 
@@ -67,13 +67,19 @@
 
 
 \noindent
-But for example the following two do not:
+But for example the following two do not
 
 \begin{lstlisting}[language={},numbers=none,keywordstyle=\color{black}]
 user@localserver
 disposable.style.email.with+symbol@example.com
 \end{lstlisting}
 
+\noindent according to the regular expression we specified in
+\eqref{email}. Whether this is intended or not is a different
+question (the second email above is actually an acceptable
+email address acording to the RFC 5322 standard for email
+addresses).
+ 
 As mentioned above, identifiers, or variables, in program code
 are often required to satisfy the constraints that they start
 with a letter and then can be followed by zero or more letters
@@ -161,7 +167,9 @@
 
 \begin{center}
 \begin{tikzpicture}
-\begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
+\begin{axis}[
+    xlabel={strings of {\tt a}s},
+    ylabel={time in secs},
     enlargelimits=false,
     xtick={0,5,...,30},
     xmax=33,
@@ -216,7 +224,9 @@
 
 \begin{center}
 \begin{tikzpicture}
-  \begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
+  \begin{axis}[
+    xlabel={strings of \pcode{a}s},
+    ylabel={time in secs},
     enlargelimits=false,
     xtick={0,3000,...,12000},
     xmax=12500,
@@ -234,7 +244,7 @@
 
 \subsection*{Basic Regular Expressions}
 
-The regular expressions shown above for Scala, we
+The regular expressions shown earlier for Scala, we
 will call \emph{extended regular expressions}. The ones we
 will mainly study in this module are \emph{basic regular
 expressions}, which by convention we will just call
@@ -246,8 +256,8 @@
 
 \begin{center}
 \begin{tabular}{r@{\hspace{1mm}}r@{\hspace{1mm}}l@{\hspace{13mm}}l}
-  $r$ & $::=$ &   $\varnothing$         & null\\
-        & $\mid$ & $\epsilon$           & empty string / \texttt{""} / []\\
+  $r$ & $::=$ &    $\ZERO$          & null language\\
+        & $\mid$ & $\ONE$           & empty string / \texttt{""} / []\\
         & $\mid$ & $c$                  & single character\\
         & $\mid$ & $r_1 + r_2$          & alternative / choice\\
         & $\mid$ & $r_1 \cdot r_2$      & sequence\\
@@ -257,37 +267,36 @@
 
 \noindent Because we overload our notation, there are some
 subtleties you should be aware of. When regular expressions
-are referred to then $\varnothing$ does not stand for the
-empty set: rather it is a particular pattern that does not
-match any string. Similarly, in the context of regular
-expressions, $\epsilon$ does not stand for the empty string
-(as in many places in the literature) but for a regular
-expression that matches the empty string. The letter $c$
-stands for any character from the alphabet at hand. Again in
-the context of regular expressions, it is a particular pattern
-that can match the specified character. You should also be
-careful with our overloading of the star: assuming you have
-read the handout about our basic mathematical notation, you
-will see that in the context of languages (sets of strings)
-the star stands for an operation on languages. Here $r^*$
-stands for a regular expression, which is different from the
-operation on sets is defined as
+are referred to then $\ZERO$ (in bold font) does not stand for
+the number zero: rather it is a particular pattern that does
+not match any string. Similarly, in the context of regular
+expressions, $\ONE$ does not stand for the number one but for
+a regular expression that matches the empty string. The letter
+$c$ stands for any character from the alphabet at hand. Again
+in the context of regular expressions, it is a particular
+pattern that can match the specified character. You should
+also be careful with our overloading of the star: assuming you
+have read the handout about our basic mathematical notation,
+you will see that in the context of languages (sets of
+strings) the star stands for an operation on languages. Here
+$r^*$ stands for a regular expression, which is different from
+the operation on sets is defined as
 
 \[
-A^* \dn \bigcup_{0\le n} A^n
+A\star\dn \bigcup_{0\le n} A^n
 \]
 
 \noindent 
 Note that this expands to
 
 \[
-A^* \dn A^0 \cup A^1 \cup A^2 \cup A^3 \cup A^4 \cup \ldots
+A\star \dn A^0 \cup A^1 \cup A^2 \cup A^3 \cup A^4 \cup \ldots
 \]
 
 \noindent which is equivalent to
 
 \[
-A^* \dn \{[]\} \cup A \cup A@A \cup A@A@A \cup A@A@A@A \cup \ldots
+A\star \dn \{[]\} \cup A \cup A@A \cup A@A@A \cup A@A@A@A \cup \ldots
 \]
 
 \noindent 
@@ -309,9 +318,12 @@
 in case of $+$ and $\cdot$ we actually do not care about the
 order and just write $r_1 + r_2 + r_3$, or $r_1 \cdot r_2
 \cdot r_3$, respectively. The reasons for this will become
-clear shortly. In the literature you will often find that the
-choice $r_1 + r_2$ is written as $r_1\mid{}r_2$ or
-$r_1\mid\mid{}r_2$. Also following the convention in the
+clear shortly. 
+
+In the literature you will often find that the choice $r_1 +
+r_2$ is written as $r_1\mid{}r_2$ or $r_1\mid\mid{}r_2$. Also,
+often our $\ZERO$ and $\ONE$ are written $\varnothing$ and
+$\epsilon$, respectively. Following the convention in the
 literature, we will often omit the $\cdot$ all together. This
 is to make some concrete regular expressions more readable.
 For example the regular expression for email addresses shown
@@ -342,8 +354,8 @@
 
 \begin{center}
 \begin{tabular}{rcl}
-$\varnothing$ & $\mapsto$ & \texttt{NULL}\\
-$\epsilon$    & $\mapsto$ & \texttt{EMPTY}\\
+$\ZERO$       & $\mapsto$ & \texttt{ZERO}\\
+$\ONE$        & $\mapsto$ & \texttt{ONE}\\
 $c$           & $\mapsto$ & \texttt{CHAR(c)}\\
 $r_1 + r_2$   & $\mapsto$ & \texttt{ALT(r1, r2)}\\
 $r_1 \cdot r_2$ & $\mapsto$ & \texttt{SEQ(r1, r2)}\\
@@ -362,7 +374,7 @@
 \begin{center}
 \begin{tabular}{rcl}
 $r+$ & $\mapsto$ & $r\cdot r^*$\\
-$r?$ & $\mapsto$ & $\epsilon + r$\\
+$r?$ & $\mapsto$ & $\ONE + r$\\
 $\backslash d$ & $\mapsto$ & $0 + 1 + 2 + \ldots + 9$\\
 $[\text{\it a - z}]$ & $\mapsto$ & $a + b + \ldots + z$\\
 \end{tabular}
@@ -388,13 +400,13 @@
 is defined as follows
 
 \begin{center}
-\begin{tabular}{rcl}
-$L(\varnothing)$  & $\dn$ & $\{\}$\\
-$L(\epsilon)$     & $\dn$ & $\{[]\}$\\
-$L(c)$            & $\dn$ & $\{[c]\}$\\
-$L(r_1+ r_2)$     & $\dn$ & $L(r_1) \cup L(r_2)$\\
+\begin{tabular}{rcll}
+$L(\ZERO)$         & $\dn$ & $\{\}$\\
+$L(\ONE)$          & $\dn$ & $\{[]\}$\\
+$L(c)$             & $\dn$ & $\{"c"\}$ & or equivalently $\dn \{[c]\}$\\
+$L(r_1+ r_2)$      & $\dn$ & $L(r_1) \cup L(r_2)$\\
 $L(r_1 \cdot r_2)$ & $\dn$ & $L(r_1) \,@\, L(r_2)$\\
-$L(r^*)$           & $\dn$ & $(L(r))^*$\\
+$L(r^*)$           & $\dn$ & $(L(r))\star$\\
 \end{tabular}
 \end{center}
 
@@ -443,9 +455,9 @@
 which can match the strings $a$ and $b$. But also the regular
 expression $b + a$ would match the same strings. However,
 sometimes it is not so obvious whether two regular expressions
-match the same strings: for example do $r^*$ and $\epsilon + r
-\cdot r^*$ match the same strings? What about $\varnothing^*$
-and $\epsilon^*$? This suggests the following relation between
+match the same strings: for example do $r^*$ and $\ONE + r
+\cdot r^*$ match the same strings? What about $\ZERO^*$
+and $\ONE^*$? This suggests the following relation between
 \defn{equivalent regular expressions}: 
 
 \[
@@ -460,12 +472,13 @@
 whether
 
 \[
-\varnothing^* \equiv \epsilon^*
+\ZERO^* \equiv \ONE^*
 \]
 
-\noindent holds? Such equivalences will be important for our
-matching algorithm, because we can use them to simplify 
-regular expressions. 
+\noindent holds or not? Such equivalences will be important
+for our matching algorithm, because we can use them to
+simplify regular expressions, which will mean we can speed
+up the calculations. 
 
 \subsection*{My Fascination for Regular Expressions}
 
@@ -475,15 +488,15 @@
 found out about them. I even have the vague recollection that
 I did not quite understand them during my study. If I remember
 correctly,\footnote{That was really a long time ago.} I got
-utterly confused about $\epsilon$ and the empty
-string.\footnote{Obviously the lecturer must have been bad.}
-Since my study, I have used regular expressions for
-implementing lexers and parsers as I have always been
-interested in all kinds of programming languages and
-compilers, which invariably need regular expression in some
-form or shape. 
+utterly confused about $\ONE$ (which my lecturer wrote as
+$\epsilon$) and the empty string.\footnote{Obviously the
+lecturer must have been bad.} Since my study, I have used
+regular expressions for implementing lexers and parsers as I
+have always been interested in all kinds of programming
+languages and compilers, which invariably need regular
+expression in some form or shape. 
 
-To understand my fascination nowadays with regular
+To understand my fascination \emph{nowadays} with regular
 expressions, you need to know that my main scientific interest
 for the last 14 years has been with theorem provers. I am a
 core developer of one of
@@ -533,16 +546,18 @@
 What a feeling if you are an outsider to the subject!
 
 To conclude: Despite my early ignorance about regular
-expressions, I find them now quite interesting. They have a
-beautiful mathematical theory behind them. They have practical
-importance (remember the shocking runtime of the regular
-expression matchers in Python and Ruby in some instances).
-People who are not very familiar with the mathematical
-background of regular expressions get them consistently wrong.
-The hope is that we can do better in the future---for example
-by proving that the algorithms actually satisfy their
-specification and that the corresponding implementations do
-not contain any bugs. We are close, but not yet quite there.
+expressions, I find them now very interesting. They have a
+beautiful mathematical theory behind them, which can be
+sometimes quite deep and contain hidden snares. They have
+practical importance (remember the shocking runtime of the
+regular expression matchers in Python and Ruby in some
+instances). People who are not very familiar with the
+mathematical background of regular expressions get them
+consistently wrong. The hope is that we can do better in the
+future---for example by proving that the algorithms actually
+satisfy their specification and that the corresponding
+implementations do not contain any bugs. We are close, but not
+yet quite there.
 
 Notwithstanding my fascination, I am also happy to admit that regular
 expressions have their shortcomings. There are some well-known
@@ -571,18 +586,28 @@
 would not know---the only thing I can say to this regular
 expression is it is a monstrosity. However, this might
 actually be an argument against the RFC standard, rather than
-against regular expressions. Still it is good to know that
-some tasks in text processing just cannot be achieved by using
-regular expressions.
+against regular expressions. A similar argument is made in
+
+\begin{center}
+\url{https://elliot.land/validating-an-email-address}
+\end{center}
+
+
+\noindent which explains some of the crazier parts of email
+addresses. Still it is good to know that some tasks in text
+processing just cannot be achieved by using regular
+expressions.
 
 
 \begin{figure}[p]
 \lstinputlisting{../progs/crawler1.scala}
+
 \caption{The Scala code for a simple web-crawler that checks
 for broken links in a web-page. It uses the regular expression
-\texttt{http\_pattern} in Line~15 for recognising URL-addresses.
-It finds all links using the library function
-\texttt{findAllIn} in Line~21.\label{crawler1}}
+\texttt{http\_pattern} in Line~\ref{httpline} for recognising
+URL-addresses. It finds all links using the library function
+\texttt{findAllIn} in Line~\ref{findallline}.\label{crawler1}}
+
 \end{figure}
 
 
@@ -592,9 +617,11 @@
 \caption{A version of the web-crawler that only follows links
 in ``my'' domain---since these are the ones I am interested in
 to fix. It uses the regular expression \texttt{my\_urls} in
-Line~16 to check for my name in the links. The main change is
-in Lines~24--28 where there is a test whether URL is in ``my''
-domain or not.\label{crawler2}}
+Line~\ref{myurlline} to check for my name in the links. The
+main change is in
+Lines~\ref{changestartline}--\ref{changeendline} where there
+is a test whether URL is in ``my'' domain or
+not.\label{crawler2}}
 
 \end{figure}
 
@@ -604,9 +631,10 @@
 \caption{A small email harvester---whenever we download a
 web-page, we also check whether it contains any email
 addresses. For this we use the regular expression
-\texttt{email\_pattern} in Line~15. The main change is in Line
-30 where all email addresses that can be found in a page are
-printed.\label{crawler3}}
+\texttt{email\_pattern} in Line~\ref{emailline}. The main
+change is in Line~\ref{mainline} where all email addresses
+that can be found in a page are printed.\label{crawler3}}
+
 \end{figure}
 
 \begin{figure}[p]
@@ -617,7 +645,7 @@
 \end{minipage}
 \end{center}
 
-\caption{Nothing that can be said\ldots\label{monster}}
+\caption{Nothing that can be said this\ldots\label{monster}}
 \end{figure}

Binary file handouts/ho02.pdf has changed

--- a/handouts/ho02.tex	Tue Mar 22 17:09:24 2016 +0000
+++ b/handouts/ho02.tex	Wed Apr 06 11:51:33 2016 +0100
@@ -4,8 +4,10 @@
 \usepackage{../graphics}
 \usepackage{../data}
 
-\pgfplotsset{compat=1.11}
+
 \begin{document}
+\fnote{\copyright{} Christian Urban, King's College London, 2014, 2015, 2016}
+
 
 \section*{Handout 2 (Regular Expression Matching)}
 
@@ -14,16 +16,18 @@
 than the matchers from regular expression libraries in Ruby
 and Python (the plots on the left). These plots show the
 running time for the evil regular expression
-$a^?{}^{\{n\}}\cdot a^{\{n\}}$ and strings composed of $n$ \pcode{a}s.
-We will use this regular expression and strings as running
-example. To see the substantial differences in the two plots
-below, note the different scales of the $x$-axes. 
+$a^?{}^{\{n\}}\cdot a^{\{n\}}$ and strings composed of $n$
+\pcode{a}s. We will use this regular expression and strings as
+running example. To see the substantial differences in the two
+plots below, note the different scales of the $x$-axes. 
 
 
 \begin{center}
 \begin{tabular}{@{}cc@{}}
 \begin{tikzpicture}
-\begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
+\begin{axis}[
+    xlabel={strings of {\tt a}s},
+    ylabel={time in secs},
     enlargelimits=false,
     xtick={0,5,...,30},
     xmax=33,
@@ -44,7 +48,9 @@
 \end{tikzpicture}
 &
 \begin{tikzpicture}
-  \begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
+  \begin{axis}[
+    xlabel={strings of \texttt{a}s},
+    ylabel={time in secs},
     enlargelimits=false,
     xtick={0,3000,...,12000},
     xmax=12500,
@@ -119,15 +125,15 @@
 
 \noindent I leave it to you to verify these equivalences and
 non-equivalences. It is also interesting to look at some
-corner cases involving $\epsilon$ and $\varnothing$:
+corner cases involving $\ONE$ and $\ZERO$:
 
 \begin{center}
 \begin{tabular}{rcl}
-$a \cdot \varnothing$ & $\not\equiv$ & $a$\\
-$a + \epsilon$ & $\not\equiv$ & $a$\\
-$\epsilon$ & $\equiv$ & $\varnothing^*$\\
-$\epsilon^*$ & $\equiv$ & $\epsilon$\\
-$\varnothing^*$ & $\not\equiv$ & $\varnothing$\\
+$a \cdot \ZERO$ & $\not\equiv$ & $a$\\
+$a + \ONE$ & $\not\equiv$ & $a$\\
+$\ONE$ & $\equiv$ & $\ZERO^*$\\
+$\ONE^*$ & $\equiv$ & $\ONE$\\
+$\ZERO^*$ & $\not\equiv$ & $\ZERO$\\
 \end{tabular}
 \end{center}
 
@@ -140,20 +146,20 @@
 
 \begin{center}
 \begin{tabular}{rcl}
-$r + \varnothing$  & $\equiv$ & $r$\\
-$\varnothing + r$  & $\equiv$ & $r$\\
-$r \cdot \epsilon$ & $\equiv$ & $r$\\
-$\epsilon \cdot r$     & $\equiv$ & $r$\\
-$r \cdot \varnothing$ & $\equiv$ & $\varnothing$\\
-$\varnothing \cdot r$ & $\equiv$ & $\varnothing$\\
+$r + \ZERO$  & $\equiv$ & $r$\\
+$\ZERO + r$  & $\equiv$ & $r$\\
+$r \cdot \ONE$ & $\equiv$ & $r$\\
+$\ONE \cdot r$     & $\equiv$ & $r$\\
+$r \cdot \ZERO$ & $\equiv$ & $\ZERO$\\
+$\ZERO \cdot r$ & $\equiv$ & $\ZERO$\\
 $r + r$ & $\equiv$ & $r$
 \end{tabular}
 \end{center}
 
 \noindent which always hold no matter what the regular
 expression $r$ looks like. The first two are easy to verify
-since $L(\varnothing)$ is the empty set. The next two are also
-easy to verify since $L(\epsilon) = \{[]\}$ and appending the
+since $L(\ZERO)$ is the empty set. The next two are also
+easy to verify since $L(\ONE) = \{[]\}$ and appending the
 empty string to every string of another set, leaves the set
 unchanged. Be careful to fully comprehend the fifth and sixth
 equivalence: if you concatenate two sets of strings and one is
@@ -167,7 +173,7 @@
 example the regular expression 
 
 \begin{equation}
-(r_1 + \varnothing) \cdot \epsilon + ((\epsilon + r_2) + r_3) \cdot (r_4 \cdot \varnothing)
+(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)
 \label{big}
 \end{equation}
 
@@ -182,21 +188,21 @@
 
 \begin{center}
 \begin{tabular}{ll}
- & $(r_1 + \varnothing) \cdot \epsilon + ((\epsilon + r_2) + r_3) \cdot 
-(\underline{r_4 \cdot \varnothing})$\smallskip\\
-$\equiv$ & $(r_1 + \varnothing) \cdot \epsilon + \underline{((\epsilon + r_2) + r_3) \cdot 
-\varnothing}$\smallskip\\
-$\equiv$ & $\underline{(r_1 + \varnothing) \cdot \epsilon} + \varnothing$\smallskip\\
-$\equiv$ & $(\underline{r_1 + \varnothing}) + \varnothing$\smallskip\\
-$\equiv$ & $\underline{r_1 + \varnothing}$\smallskip\\
+ & $(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot 
+(\underline{r_4 \cdot \ZERO})$\smallskip\\
+$\equiv$ & $(r_1 + \ZERO) \cdot \ONE + \underline{((\ONE + r_2) + r_3) \cdot 
+\ZERO}$\smallskip\\
+$\equiv$ & $\underline{(r_1 + \ZERO) \cdot \ONE} + \ZERO$\smallskip\\
+$\equiv$ & $(\underline{r_1 + \ZERO}) + \ZERO$\smallskip\\
+$\equiv$ & $\underline{r_1 + \ZERO}$\smallskip\\
 $\equiv$ & $r_1$\
 \end{tabular}
 \end{center}
 
 \noindent In each step, I underlined where a simplification
 rule is applied. Our matching algorithm in the next section
-will often generate such ``useless'' $\epsilon$s and
-$\varnothing$s, therefore simplifying them away will make the
+will often generate such ``useless'' $\ONE$s and
+$\ZERO$s, therefore simplifying them away will make the
 algorithm quite a bit faster.
 
 \subsection*{The Matching Algorithm}
@@ -209,8 +215,8 @@
 
 \begin{center}
 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
-$nullable(\varnothing)$      & $\dn$ & $\textit{false}$\\
-$nullable(\epsilon)$         & $\dn$ & $true$\\
+$nullable(\ZERO)$      & $\dn$ & $\textit{false}$\\
+$nullable(\ONE)$         & $\dn$ & $true$\\
 $nullable(c)$                & $\dn$ & $\textit{false}$\\
 $nullable(r_1 + r_2)$     & $\dn$ &  $nullable(r_1) \vee nullable(r_2)$\\ 
 $nullable(r_1 \cdot r_2)$ & $\dn$ &  $nullable(r_1) \wedge nullable(r_2)$\\
@@ -243,9 +249,9 @@
 
 \begin{center}
 \begin{tabular}{l@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
-  $der\, c\, (\varnothing)$      & $\dn$ & $\varnothing$\\
-  $der\, c\, (\epsilon)$         & $\dn$ & $\varnothing$ \\
-  $der\, c\, (d)$                & $\dn$ & if $c = d$ then $\epsilon$ else $\varnothing$\\
+  $der\, c\, (\ZERO)$      & $\dn$ & $\ZERO$\\
+  $der\, c\, (\ONE)$         & $\dn$ & $\ZERO$ \\
+  $der\, c\, (d)$                & $\dn$ & if $c = d$ then $\ONE$ else $\ZERO$\\
   $der\, c\, (r_1 + r_2)$        & $\dn$ & $der\, c\, r_1 + der\, c\, r_2$\\
   $der\, c\, (r_1 \cdot r_2)$  & $\dn$  & if $nullable (r_1)$\\
   & & then $(der\,c\,r_1) \cdot r_2 + der\, c\, r_2$\\ 
@@ -258,14 +264,14 @@
 follows: recall that $der$ should calculate a regular
 expression so that given the ``input'' regular expression can
 match a string of the form $c\!::\!s$, we want a regular
-expression for $s$. Since neither $\varnothing$ nor $\epsilon$
+expression for $s$. Since neither $\ZERO$ nor $\ONE$
 can match a string of the form $c\!::\!s$, we return
-$\varnothing$. In the third case we have to make a
+$\ZERO$. In the third case we have to make a
 case-distinction: In case the regular expression is $c$, then
 clearly it can recognise a string of the form $c\!::\!s$, just
 that $s$ is the empty string. Therefore we return the
-$\epsilon$-regular expression. In the other case we again
-return $\varnothing$ since no string of the $c\!::\!s$ can be
+$\ONE$-regular expression. In the other case we again
+return $\ZERO$ since no string of the $c\!::\!s$ can be
 matched. Next come the recursive cases, which are a bit more
 involved. Fortunately, the $+$-case is still relatively
 straightforward: all strings of the form $c\!::\!s$ are either
@@ -292,9 +298,9 @@
 the definition of $der$ by considering the following operation
 on sets:
 
-\[
+\begin{equation}\label{Der}
 Der\,c\,A\;\dn\;\{s\,|\,c\!::\!s \in A\}
-\]
+\end{equation}
 
 \noindent This operation essentially transforms a set of
 strings $A$ by filtering out all strings that do not start
@@ -406,7 +412,7 @@
 \begin{figure}[p]
 \lstinputlisting{../progs/app5.scala}
 \caption{Scala implementation of the nullable and 
-  derivatives functions. These functions are easy to
+  derivative functions. These functions are easy to
   implement in functional languages, because pattern 
   matching and recursion allow us to mimic the mathematical
   definitions very closely.\label{scala1}}
@@ -460,7 +466,7 @@
 \noindent Our algorithm traverses such regular expressions at
 least once every time a derivative is calculated. So having
 large regular expressions will cause problems. This problem
-is aggravated by $a^?$ being represented as $a + \epsilon$.
+is aggravated by $a^?$ being represented as $a + \ONE$.
 
 We can however fix this by having an explicit constructor for
 $r^{\{n\}}$. In Scala we would introduce a constructor like
@@ -508,14 +514,14 @@
 need to traverse the whole regular expression. There seems,
 however, one more issue for making the algorithm run faster.
 The derivative function often produces ``useless''
-$\varnothing$s and $\epsilon$s. To see this, consider $r = ((a
+$\ZERO$s and $\ONE$s. To see this, consider $r = ((a
 \cdot b) + b)^*$ and the following two derivatives
 
 \begin{center}
 \begin{tabular}{l}
-$der\,a\,r = ((\epsilon \cdot b) + \varnothing) \cdot r$\\
-$der\,b\,r = ((\varnothing \cdot b) + \epsilon)\cdot r$\\
-$der\,c\,r = ((\varnothing \cdot b) + \varnothing)\cdot r$
+$der\,a\,r = ((\ONE \cdot b) + \ZERO) \cdot r$\\
+$der\,b\,r = ((\ZERO \cdot b) + \ONE)\cdot r$\\
+$der\,c\,r = ((\ZERO \cdot b) + \ZERO)\cdot r$
 \end{tabular}
 \end{center}
 
@@ -528,7 +534,7 @@
 \begin{tabular}{l}
 $der\,a\,r \equiv b \cdot r$\\
 $der\,b\,r \equiv r$\\
-$der\,c\,r \equiv \varnothing$
+$der\,c\,r \equiv \ZERO$
 \end{tabular}
 \end{center}
 
@@ -553,7 +559,7 @@
 calls \texttt{der} first, but then simplifies
 the resulting derivative regular expressions before
 building the next derivative, see
-Line~28.\label{scala2}}
+Line~\ref{simpline}.\label{scala2}}
 \end{figure}
 
 \begin{center}
@@ -590,8 +596,8 @@
 
 \begin{center}
 \begin{tabular}{r@{\hspace{1mm}}r@{\hspace{1mm}}l@{\hspace{13mm}}l}
-  $r$ & $::=$ &   $\varnothing$         & null\\
-        & $\mid$ & $\epsilon$           & empty string / \texttt{""} / []\\
+  $r$ & $::=$ &   $\ZERO$         & null language\\
+        & $\mid$ & $\ONE$           & empty string / \texttt{""} / []\\
         & $\mid$ & $c$                  & single character\\
         & $\mid$ & $r_1 + r_2$          & alternative / choice\\
         & $\mid$ & $r_1 \cdot r_2$      & sequence\\
@@ -604,7 +610,7 @@
 the recipe:
 
 \begin{itemize}
-\item $P$ has to hold for $\varnothing$, $\epsilon$ and $c$
+\item $P$ has to hold for $\ZERO$, $\ONE$ and $c$
  (these are the base cases).
 \item $P$ has to hold for $r_1 + r_2$ under the assumption 
    that $P$ already holds for $r_1$ and $r_2$.
@@ -625,21 +631,21 @@
 
 \noindent
 Let us say that this property is $P(r)$, then the first case
-we need to check is whether $P(\varnothing)$ (see recipe 
+we need to check is whether $P(\ZERO)$ (see recipe 
 above). So we have to show that
 
 \[
-nullable(\varnothing) \;\;\text{if and only if}\;\; 
-[]\in L(\varnothing)
+nullable(\ZERO) \;\;\text{if and only if}\;\; 
+[]\in L(\ZERO)
 \]
 
-\noindent whereby $nullable(\varnothing)$ is by definition of
+\noindent whereby $nullable(\ZERO)$ is by definition of
 the function $nullable$ always $\textit{false}$. We also have
-that $L(\varnothing)$ is by definition $\{\}$. It is
+that $L(\ZERO)$ is by definition $\{\}$. It is
 impossible that the empty string $[]$ is in the empty set.
 Therefore also the right-hand side is false. Consequently we
 verified this case: both sides are false. We would still need
-to do this for $P(\varepsilon)$ and $P(c)$. I leave this to
+to do this for $P(\ONE)$ and $P(c)$. I leave this to
 you to verify.
 
 Next we need to check the inductive cases, for example
@@ -718,9 +724,16 @@
 \label{dersprop}
 \end{equation}
 
-\noindent by induction on $s$. In this proof you can assume
-the following property for $der$ and $Der$ has already been
-proved, that is you can assume
+\noindent by induction on $s$. Recall $Der$ is defined for 
+character---see \eqref{Der}; $Ders$ is similar, but for strings:
+
+\[
+Ders\,s\,A\;\dn\;\{s'\,|\,s @ s' \in A\}
+\]
+
+\noindent In this proof you can assume the following property
+for $der$ and $Der$ has already been proved, that is you can
+assume
 
 \[
 L(der\,c\,r) = Der\,c\,(L(r))

--- a/langs.sty	Tue Mar 22 17:09:24 2016 +0000
+++ b/langs.sty	Wed Apr 06 11:51:33 2016 +0100
@@ -65,3 +65,5 @@
 \newcommand{\pcode}[1]{\mbox{\lstset{language={},keywordstyle=\color{black}}\lstinline!#1!}}
 \newcommand{\scode}[1]{\mbox{\lstset{language={},basicstyle=\ttfamily\color{codegreen}}\lstinline!#1!}}
 \makeatother
+
+\lstset{escapeinside={(*@}{@*)}}

--- a/progs/app5.scala	Tue Mar 22 17:09:24 2016 +0000
+++ b/progs/app5.scala	Wed Apr 06 11:51:33 2016 +0100
@@ -1,6 +1,6 @@
 def nullable (r: Rexp) : Boolean = r match {
-  case NULL => false
-  case EMPTY => true
+  case ZERO => false
+  case ONE => true
   case CHAR(_) => false
   case ALT(r1, r2) => nullable(r1) || nullable(r2)
   case SEQ(r1, r2) => nullable(r1) && nullable(r2)
@@ -8,9 +8,9 @@
 }
 
 def der (c: Char, r: Rexp) : Rexp = r match {
-  case NULL => NULL
-  case EMPTY => NULL
-  case CHAR(d) => if (c == d) EMPTY else NULL
+  case ZERO => ZERO
+  case ONE => ZERO
+  case CHAR(d) => if (c == d) ONE else ZERO
   case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
   case SEQ(r1, r2) => 
     if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))

--- a/progs/app51.scala	Tue Mar 22 17:09:24 2016 +0000
+++ b/progs/app51.scala	Wed Apr 06 11:51:33 2016 +0100
@@ -1,7 +1,7 @@
-def OPT(r: Rexp) = ALT(r, EMPTY)
+def OPT(r: Rexp) = ALT(r, ONE)
 
 def NTIMES(r: Rexp, n: Int) : Rexp = n match {
-  case 0 => EMPTY
+  case 0 => ONE
   case 1 => r
   case n => SEQ(r, NTIMES(r, n - 1))
 }

--- a/progs/app6.scala	Tue Mar 22 17:09:24 2016 +0000
+++ b/progs/app6.scala	Wed Apr 06 11:51:33 2016 +0100
@@ -1,17 +1,17 @@
 def simp(r: Rexp): Rexp = r match {
   case ALT(r1, r2) => {
     (simp(r1), simp(r2)) match {
-      case (NULL, r2s) => r2s
-      case (r1s, NULL) => r1s
+      case (ZERO, r2s) => r2s
+      case (r1s, ZERO) => r1s
       case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s)
     }
   }
   case SEQ(r1, r2) => {
     (simp(r1), simp(r2)) match {
-      case (NULL, _) => NULL
-      case (_, NULL) => NULL
-      case (EMPTY, r2s) => r2s
-      case (r1s, EMPTY) => r1s
+      case (ZERO, _) => ZERO
+      case (_, ZERO) => ZERO
+      case (ONE, r2s) => r2s
+      case (r1s, ONE) => r1s
       case (r1s, r2s) => SEQ(r1s, r2s)
     }
   }
@@ -21,6 +21,6 @@
 
 def ders (s: List[Char], r: Rexp) : Rexp = s match {
   case Nil => r
-  case c::s => ders(s, simp(der(c, r)))
+  case c::s => ders(s, simp(der(c, r))) (*@\label{simpline}@*) 
 }

--- a/progs/crawler1.scala	Tue Mar 22 17:09:24 2016 +0000
+++ b/progs/crawler1.scala	Wed Apr 06 11:51:33 2016 +0100
@@ -12,13 +12,13 @@
 }
 
 // regex for URLs
-val http_pattern = """"https?://[^"]*"""".r
+val http_pattern = """"https?://[^"]*"""".r (*@\label{httpline}@*) 
 
 // drops the first and last character from a string
 def unquote(s: String) = s.drop(1).dropRight(1)
 
 def get_all_URLs(page: String) : Set[String] = 
-  http_pattern.findAllIn(page).map(unquote).toSet
+  http_pattern.findAllIn(page).map(unquote).toSet (*@\label{findallline}@*) 
 
 
 // naive version of crawl - searches until a given depth,

--- a/progs/crawler2.scala	Tue Mar 22 17:09:24 2016 +0000
+++ b/progs/crawler2.scala	Wed Apr 06 11:51:33 2016 +0100
@@ -13,7 +13,7 @@
 
 // regexes for URLs and "my" domain
 val http_pattern = """"https?://[^"]*"""".r
-val my_urls = """urbanc""".r
+val my_urls = """urbanc""".r (*@\label{myurlline}@*) 
 
 def unquote(s: String) = s.drop(1).dropRight(1)
 
@@ -21,11 +21,11 @@
   http_pattern.findAllIn(page).map(unquote).toSet
 
 def crawl(url: String, n: Int) : Unit = {
-  if (n == 0) ()
+  if (n == 0) () (*@\label{changestartline}@*) 
   else if (my_urls.findFirstIn(url) == None) { 
     println(s"Visiting: $n $url")
     get_page(url); () 
-  }
+  } (*@\label{changeendline}@*) 
   else {
     println(s"Visiting: $n $url")
     for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)

--- a/progs/crawler3.scala	Tue Mar 22 17:09:24 2016 +0000
+++ b/progs/crawler3.scala	Wed Apr 06 11:51:33 2016 +0100
@@ -12,7 +12,7 @@
 
 // regexes for URLs, for "my" domain and for email addresses
 val http_pattern = """"https?://[^"]*"""".r
-val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
+val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r (*@\label{emailline}@*) 
 
 def unquote(s: String) = s.drop(1).dropRight(1)
 
@@ -27,7 +27,7 @@
   else {
     println(s"Visiting: $n $url")
     val page = get_page(url)
-    print_str(email_pattern.findAllIn(page).mkString("\n"))
+    print_str(email_pattern.findAllIn(page).mkString("\n")) (*@\label{mainline}@*) 
     for (u <- get_all_URLs(page).par) crawl(u, n - 1)
   }
 }

--- a/style.sty	Tue Mar 22 17:09:24 2016 +0000
+++ b/style.sty	Wed Apr 06 11:51:33 2016 +0100
@@ -1,5 +1,5 @@
 \usepackage{xcolor}
-\usepackage{fontspec}
+%%\usepackage{fontspec}
 \usepackage[sc]{mathpazo}
 \usepackage{fontspec}
 \setmainfont[Ligatures=TeX]{Palatino Linotype}

author	Christian Urban <christian dot urban at kcl dot ac dot uk>
	Wed, 06 Apr 2016 11:51:33 +0100
changeset 399	5c1fbb39c93e
parent 398	c8ce95067c1a
child 400	e4afe3f46c29

graphics.sty		file \| annotate \| diff \| comparison \| revisions
handouts/ho01.pdf		file \| annotate \| diff \| comparison \| revisions
handouts/ho01.tex		file \| annotate \| diff \| comparison \| revisions
handouts/ho02.pdf		file \| annotate \| diff \| comparison \| revisions
handouts/ho02.tex		file \| annotate \| diff \| comparison \| revisions
langs.sty		file \| annotate \| diff \| comparison \| revisions
progs/app5.scala		file \| annotate \| diff \| comparison \| revisions
progs/app51.scala		file \| annotate \| diff \| comparison \| revisions
progs/app6.scala		file \| annotate \| diff \| comparison \| revisions
progs/crawler1.scala		file \| annotate \| diff \| comparison \| revisions
progs/crawler2.scala		file \| annotate \| diff \| comparison \| revisions
progs/crawler3.scala		file \| annotate \| diff \| comparison \| revisions
style.sty		file \| annotate \| diff \| comparison \| revisions