--- a/graphics.sty Tue Mar 22 17:09:24 2016 +0000
+++ b/graphics.sty Wed Apr 06 11:51:33 2016 +0100
@@ -10,6 +10,7 @@
\usepackage{graphicx}
\usepackage{pgfplots}
+\pgfplotsset{compat=1.11}
\newenvironment{bubble}[1][]{%
\addtolength{\leftmargini}{4mm}%
Binary file handouts/ho01.pdf has changed
--- a/handouts/ho01.tex Tue Mar 22 17:09:24 2016 +0000
+++ b/handouts/ho01.tex Wed Apr 06 11:51:33 2016 +0100
@@ -10,7 +10,7 @@
%%https://www.reddit.com/r/ProgrammingLanguages/comments/43wlkq/formal_grammar_for_csh_tsch_sh_or_bash/
\begin{document}
-\fnote{\copyright{} Christian Urban, 2014, 2015}
+\fnote{\copyright{} Christian Urban, King's College London, 2014, 2015, 2016}
\section*{Handout 1}
@@ -26,10 +26,10 @@
followed by zero or more letters, numbers and underscores.
Also often we face the problem that we are given a string (for
example some user input) and want to know whether it matches a
-particular pattern. In this way we can, for example, exclude
-user input that would otherwise have nasty effects on our
-program (crashing it or making it go into an infinite loop, if
-not worse).
+particular pattern---be it an email address, for example. In
+this way we can exclude user input that would otherwise have
+nasty effects on our program (crashing it or making it go into
+an infinite loop, if not worse).
\defn{Regular expressions} help with conveniently specifying
such patterns. The idea behind regular expressions is that
@@ -38,8 +38,8 @@
computer science. For example there is no convenient regular
expression for describing the English language short of
enumerating all English words. But they seem useful for
-describing for example email addresses.\footnote{See ``8
-Regular Expressions You Should Know''
+describing for example simple email addresses.\footnote{See
+``8 Regular Expressions You Should Know''
\url{http://goo.gl/5LoVX7}} Consider the following regular
expression
@@ -67,13 +67,19 @@
\noindent
-But for example the following two do not:
+But for example the following two do not
\begin{lstlisting}[language={},numbers=none,keywordstyle=\color{black}]
user@localserver
disposable.style.email.with+symbol@example.com
\end{lstlisting}
+\noindent according to the regular expression we specified in
+\eqref{email}. Whether this is intended or not is a different
+question (the second email above is actually an acceptable
+email address acording to the RFC 5322 standard for email
+addresses).
+
As mentioned above, identifiers, or variables, in program code
are often required to satisfy the constraints that they start
with a letter and then can be followed by zero or more letters
@@ -161,7 +167,9 @@
\begin{center}
\begin{tikzpicture}
-\begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
+\begin{axis}[
+ xlabel={strings of {\tt a}s},
+ ylabel={time in secs},
enlargelimits=false,
xtick={0,5,...,30},
xmax=33,
@@ -216,7 +224,9 @@
\begin{center}
\begin{tikzpicture}
- \begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
+ \begin{axis}[
+ xlabel={strings of \pcode{a}s},
+ ylabel={time in secs},
enlargelimits=false,
xtick={0,3000,...,12000},
xmax=12500,
@@ -234,7 +244,7 @@
\subsection*{Basic Regular Expressions}
-The regular expressions shown above for Scala, we
+The regular expressions shown earlier for Scala, we
will call \emph{extended regular expressions}. The ones we
will mainly study in this module are \emph{basic regular
expressions}, which by convention we will just call
@@ -246,8 +256,8 @@
\begin{center}
\begin{tabular}{r@{\hspace{1mm}}r@{\hspace{1mm}}l@{\hspace{13mm}}l}
- $r$ & $::=$ & $\varnothing$ & null\\
- & $\mid$ & $\epsilon$ & empty string / \texttt{""} / []\\
+ $r$ & $::=$ & $\ZERO$ & null language\\
+ & $\mid$ & $\ONE$ & empty string / \texttt{""} / []\\
& $\mid$ & $c$ & single character\\
& $\mid$ & $r_1 + r_2$ & alternative / choice\\
& $\mid$ & $r_1 \cdot r_2$ & sequence\\
@@ -257,37 +267,36 @@
\noindent Because we overload our notation, there are some
subtleties you should be aware of. When regular expressions
-are referred to then $\varnothing$ does not stand for the
-empty set: rather it is a particular pattern that does not
-match any string. Similarly, in the context of regular
-expressions, $\epsilon$ does not stand for the empty string
-(as in many places in the literature) but for a regular
-expression that matches the empty string. The letter $c$
-stands for any character from the alphabet at hand. Again in
-the context of regular expressions, it is a particular pattern
-that can match the specified character. You should also be
-careful with our overloading of the star: assuming you have
-read the handout about our basic mathematical notation, you
-will see that in the context of languages (sets of strings)
-the star stands for an operation on languages. Here $r^*$
-stands for a regular expression, which is different from the
-operation on sets is defined as
+are referred to then $\ZERO$ (in bold font) does not stand for
+the number zero: rather it is a particular pattern that does
+not match any string. Similarly, in the context of regular
+expressions, $\ONE$ does not stand for the number one but for
+a regular expression that matches the empty string. The letter
+$c$ stands for any character from the alphabet at hand. Again
+in the context of regular expressions, it is a particular
+pattern that can match the specified character. You should
+also be careful with our overloading of the star: assuming you
+have read the handout about our basic mathematical notation,
+you will see that in the context of languages (sets of
+strings) the star stands for an operation on languages. Here
+$r^*$ stands for a regular expression, which is different from
+the operation on sets is defined as
\[
-A^* \dn \bigcup_{0\le n} A^n
+A\star\dn \bigcup_{0\le n} A^n
\]
\noindent
Note that this expands to
\[
-A^* \dn A^0 \cup A^1 \cup A^2 \cup A^3 \cup A^4 \cup \ldots
+A\star \dn A^0 \cup A^1 \cup A^2 \cup A^3 \cup A^4 \cup \ldots
\]
\noindent which is equivalent to
\[
-A^* \dn \{[]\} \cup A \cup A@A \cup A@A@A \cup A@A@A@A \cup \ldots
+A\star \dn \{[]\} \cup A \cup A@A \cup A@A@A \cup A@A@A@A \cup \ldots
\]
\noindent
@@ -309,9 +318,12 @@
in case of $+$ and $\cdot$ we actually do not care about the
order and just write $r_1 + r_2 + r_3$, or $r_1 \cdot r_2
\cdot r_3$, respectively. The reasons for this will become
-clear shortly. In the literature you will often find that the
-choice $r_1 + r_2$ is written as $r_1\mid{}r_2$ or
-$r_1\mid\mid{}r_2$. Also following the convention in the
+clear shortly.
+
+In the literature you will often find that the choice $r_1 +
+r_2$ is written as $r_1\mid{}r_2$ or $r_1\mid\mid{}r_2$. Also,
+often our $\ZERO$ and $\ONE$ are written $\varnothing$ and
+$\epsilon$, respectively. Following the convention in the
literature, we will often omit the $\cdot$ all together. This
is to make some concrete regular expressions more readable.
For example the regular expression for email addresses shown
@@ -342,8 +354,8 @@
\begin{center}
\begin{tabular}{rcl}
-$\varnothing$ & $\mapsto$ & \texttt{NULL}\\
-$\epsilon$ & $\mapsto$ & \texttt{EMPTY}\\
+$\ZERO$ & $\mapsto$ & \texttt{ZERO}\\
+$\ONE$ & $\mapsto$ & \texttt{ONE}\\
$c$ & $\mapsto$ & \texttt{CHAR(c)}\\
$r_1 + r_2$ & $\mapsto$ & \texttt{ALT(r1, r2)}\\
$r_1 \cdot r_2$ & $\mapsto$ & \texttt{SEQ(r1, r2)}\\
@@ -362,7 +374,7 @@
\begin{center}
\begin{tabular}{rcl}
$r+$ & $\mapsto$ & $r\cdot r^*$\\
-$r?$ & $\mapsto$ & $\epsilon + r$\\
+$r?$ & $\mapsto$ & $\ONE + r$\\
$\backslash d$ & $\mapsto$ & $0 + 1 + 2 + \ldots + 9$\\
$[\text{\it a - z}]$ & $\mapsto$ & $a + b + \ldots + z$\\
\end{tabular}
@@ -388,13 +400,13 @@
is defined as follows
\begin{center}
-\begin{tabular}{rcl}
-$L(\varnothing)$ & $\dn$ & $\{\}$\\
-$L(\epsilon)$ & $\dn$ & $\{[]\}$\\
-$L(c)$ & $\dn$ & $\{[c]\}$\\
-$L(r_1+ r_2)$ & $\dn$ & $L(r_1) \cup L(r_2)$\\
+\begin{tabular}{rcll}
+$L(\ZERO)$ & $\dn$ & $\{\}$\\
+$L(\ONE)$ & $\dn$ & $\{[]\}$\\
+$L(c)$ & $\dn$ & $\{"c"\}$ & or equivalently $\dn \{[c]\}$\\
+$L(r_1+ r_2)$ & $\dn$ & $L(r_1) \cup L(r_2)$\\
$L(r_1 \cdot r_2)$ & $\dn$ & $L(r_1) \,@\, L(r_2)$\\
-$L(r^*)$ & $\dn$ & $(L(r))^*$\\
+$L(r^*)$ & $\dn$ & $(L(r))\star$\\
\end{tabular}
\end{center}
@@ -443,9 +455,9 @@
which can match the strings $a$ and $b$. But also the regular
expression $b + a$ would match the same strings. However,
sometimes it is not so obvious whether two regular expressions
-match the same strings: for example do $r^*$ and $\epsilon + r
-\cdot r^*$ match the same strings? What about $\varnothing^*$
-and $\epsilon^*$? This suggests the following relation between
+match the same strings: for example do $r^*$ and $\ONE + r
+\cdot r^*$ match the same strings? What about $\ZERO^*$
+and $\ONE^*$? This suggests the following relation between
\defn{equivalent regular expressions}:
\[
@@ -460,12 +472,13 @@
whether
\[
-\varnothing^* \equiv \epsilon^*
+\ZERO^* \equiv \ONE^*
\]
-\noindent holds? Such equivalences will be important for our
-matching algorithm, because we can use them to simplify
-regular expressions.
+\noindent holds or not? Such equivalences will be important
+for our matching algorithm, because we can use them to
+simplify regular expressions, which will mean we can speed
+up the calculations.
\subsection*{My Fascination for Regular Expressions}
@@ -475,15 +488,15 @@
found out about them. I even have the vague recollection that
I did not quite understand them during my study. If I remember
correctly,\footnote{That was really a long time ago.} I got
-utterly confused about $\epsilon$ and the empty
-string.\footnote{Obviously the lecturer must have been bad.}
-Since my study, I have used regular expressions for
-implementing lexers and parsers as I have always been
-interested in all kinds of programming languages and
-compilers, which invariably need regular expression in some
-form or shape.
+utterly confused about $\ONE$ (which my lecturer wrote as
+$\epsilon$) and the empty string.\footnote{Obviously the
+lecturer must have been bad.} Since my study, I have used
+regular expressions for implementing lexers and parsers as I
+have always been interested in all kinds of programming
+languages and compilers, which invariably need regular
+expression in some form or shape.
-To understand my fascination nowadays with regular
+To understand my fascination \emph{nowadays} with regular
expressions, you need to know that my main scientific interest
for the last 14 years has been with theorem provers. I am a
core developer of one of
@@ -533,16 +546,18 @@
What a feeling if you are an outsider to the subject!
To conclude: Despite my early ignorance about regular
-expressions, I find them now quite interesting. They have a
-beautiful mathematical theory behind them. They have practical
-importance (remember the shocking runtime of the regular
-expression matchers in Python and Ruby in some instances).
-People who are not very familiar with the mathematical
-background of regular expressions get them consistently wrong.
-The hope is that we can do better in the future---for example
-by proving that the algorithms actually satisfy their
-specification and that the corresponding implementations do
-not contain any bugs. We are close, but not yet quite there.
+expressions, I find them now very interesting. They have a
+beautiful mathematical theory behind them, which can be
+sometimes quite deep and contain hidden snares. They have
+practical importance (remember the shocking runtime of the
+regular expression matchers in Python and Ruby in some
+instances). People who are not very familiar with the
+mathematical background of regular expressions get them
+consistently wrong. The hope is that we can do better in the
+future---for example by proving that the algorithms actually
+satisfy their specification and that the corresponding
+implementations do not contain any bugs. We are close, but not
+yet quite there.
Notwithstanding my fascination, I am also happy to admit that regular
expressions have their shortcomings. There are some well-known
@@ -571,18 +586,28 @@
would not know---the only thing I can say to this regular
expression is it is a monstrosity. However, this might
actually be an argument against the RFC standard, rather than
-against regular expressions. Still it is good to know that
-some tasks in text processing just cannot be achieved by using
-regular expressions.
+against regular expressions. A similar argument is made in
+
+\begin{center}
+\url{https://elliot.land/validating-an-email-address}
+\end{center}
+
+
+\noindent which explains some of the crazier parts of email
+addresses. Still it is good to know that some tasks in text
+processing just cannot be achieved by using regular
+expressions.
\begin{figure}[p]
\lstinputlisting{../progs/crawler1.scala}
+
\caption{The Scala code for a simple web-crawler that checks
for broken links in a web-page. It uses the regular expression
-\texttt{http\_pattern} in Line~15 for recognising URL-addresses.
-It finds all links using the library function
-\texttt{findAllIn} in Line~21.\label{crawler1}}
+\texttt{http\_pattern} in Line~\ref{httpline} for recognising
+URL-addresses. It finds all links using the library function
+\texttt{findAllIn} in Line~\ref{findallline}.\label{crawler1}}
+
\end{figure}
@@ -592,9 +617,11 @@
\caption{A version of the web-crawler that only follows links
in ``my'' domain---since these are the ones I am interested in
to fix. It uses the regular expression \texttt{my\_urls} in
-Line~16 to check for my name in the links. The main change is
-in Lines~24--28 where there is a test whether URL is in ``my''
-domain or not.\label{crawler2}}
+Line~\ref{myurlline} to check for my name in the links. The
+main change is in
+Lines~\ref{changestartline}--\ref{changeendline} where there
+is a test whether URL is in ``my'' domain or
+not.\label{crawler2}}
\end{figure}
@@ -604,9 +631,10 @@
\caption{A small email harvester---whenever we download a
web-page, we also check whether it contains any email
addresses. For this we use the regular expression
-\texttt{email\_pattern} in Line~15. The main change is in Line
-30 where all email addresses that can be found in a page are
-printed.\label{crawler3}}
+\texttt{email\_pattern} in Line~\ref{emailline}. The main
+change is in Line~\ref{mainline} where all email addresses
+that can be found in a page are printed.\label{crawler3}}
+
\end{figure}
\begin{figure}[p]
@@ -617,7 +645,7 @@
\end{minipage}
\end{center}
-\caption{Nothing that can be said\ldots\label{monster}}
+\caption{Nothing that can be said this\ldots\label{monster}}
\end{figure}
Binary file handouts/ho02.pdf has changed
--- a/handouts/ho02.tex Tue Mar 22 17:09:24 2016 +0000
+++ b/handouts/ho02.tex Wed Apr 06 11:51:33 2016 +0100
@@ -4,8 +4,10 @@
\usepackage{../graphics}
\usepackage{../data}
-\pgfplotsset{compat=1.11}
+
\begin{document}
+\fnote{\copyright{} Christian Urban, King's College London, 2014, 2015, 2016}
+
\section*{Handout 2 (Regular Expression Matching)}
@@ -14,16 +16,18 @@
than the matchers from regular expression libraries in Ruby
and Python (the plots on the left). These plots show the
running time for the evil regular expression
-$a^?{}^{\{n\}}\cdot a^{\{n\}}$ and strings composed of $n$ \pcode{a}s.
-We will use this regular expression and strings as running
-example. To see the substantial differences in the two plots
-below, note the different scales of the $x$-axes.
+$a^?{}^{\{n\}}\cdot a^{\{n\}}$ and strings composed of $n$
+\pcode{a}s. We will use this regular expression and strings as
+running example. To see the substantial differences in the two
+plots below, note the different scales of the $x$-axes.
\begin{center}
\begin{tabular}{@{}cc@{}}
\begin{tikzpicture}
-\begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
+\begin{axis}[
+ xlabel={strings of {\tt a}s},
+ ylabel={time in secs},
enlargelimits=false,
xtick={0,5,...,30},
xmax=33,
@@ -44,7 +48,9 @@
\end{tikzpicture}
&
\begin{tikzpicture}
- \begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
+ \begin{axis}[
+ xlabel={strings of \texttt{a}s},
+ ylabel={time in secs},
enlargelimits=false,
xtick={0,3000,...,12000},
xmax=12500,
@@ -119,15 +125,15 @@
\noindent I leave it to you to verify these equivalences and
non-equivalences. It is also interesting to look at some
-corner cases involving $\epsilon$ and $\varnothing$:
+corner cases involving $\ONE$ and $\ZERO$:
\begin{center}
\begin{tabular}{rcl}
-$a \cdot \varnothing$ & $\not\equiv$ & $a$\\
-$a + \epsilon$ & $\not\equiv$ & $a$\\
-$\epsilon$ & $\equiv$ & $\varnothing^*$\\
-$\epsilon^*$ & $\equiv$ & $\epsilon$\\
-$\varnothing^*$ & $\not\equiv$ & $\varnothing$\\
+$a \cdot \ZERO$ & $\not\equiv$ & $a$\\
+$a + \ONE$ & $\not\equiv$ & $a$\\
+$\ONE$ & $\equiv$ & $\ZERO^*$\\
+$\ONE^*$ & $\equiv$ & $\ONE$\\
+$\ZERO^*$ & $\not\equiv$ & $\ZERO$\\
\end{tabular}
\end{center}
@@ -140,20 +146,20 @@
\begin{center}
\begin{tabular}{rcl}
-$r + \varnothing$ & $\equiv$ & $r$\\
-$\varnothing + r$ & $\equiv$ & $r$\\
-$r \cdot \epsilon$ & $\equiv$ & $r$\\
-$\epsilon \cdot r$ & $\equiv$ & $r$\\
-$r \cdot \varnothing$ & $\equiv$ & $\varnothing$\\
-$\varnothing \cdot r$ & $\equiv$ & $\varnothing$\\
+$r + \ZERO$ & $\equiv$ & $r$\\
+$\ZERO + r$ & $\equiv$ & $r$\\
+$r \cdot \ONE$ & $\equiv$ & $r$\\
+$\ONE \cdot r$ & $\equiv$ & $r$\\
+$r \cdot \ZERO$ & $\equiv$ & $\ZERO$\\
+$\ZERO \cdot r$ & $\equiv$ & $\ZERO$\\
$r + r$ & $\equiv$ & $r$
\end{tabular}
\end{center}
\noindent which always hold no matter what the regular
expression $r$ looks like. The first two are easy to verify
-since $L(\varnothing)$ is the empty set. The next two are also
-easy to verify since $L(\epsilon) = \{[]\}$ and appending the
+since $L(\ZERO)$ is the empty set. The next two are also
+easy to verify since $L(\ONE) = \{[]\}$ and appending the
empty string to every string of another set, leaves the set
unchanged. Be careful to fully comprehend the fifth and sixth
equivalence: if you concatenate two sets of strings and one is
@@ -167,7 +173,7 @@
example the regular expression
\begin{equation}
-(r_1 + \varnothing) \cdot \epsilon + ((\epsilon + r_2) + r_3) \cdot (r_4 \cdot \varnothing)
+(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot (r_4 \cdot \ZERO)
\label{big}
\end{equation}
@@ -182,21 +188,21 @@
\begin{center}
\begin{tabular}{ll}
- & $(r_1 + \varnothing) \cdot \epsilon + ((\epsilon + r_2) + r_3) \cdot
-(\underline{r_4 \cdot \varnothing})$\smallskip\\
-$\equiv$ & $(r_1 + \varnothing) \cdot \epsilon + \underline{((\epsilon + r_2) + r_3) \cdot
-\varnothing}$\smallskip\\
-$\equiv$ & $\underline{(r_1 + \varnothing) \cdot \epsilon} + \varnothing$\smallskip\\
-$\equiv$ & $(\underline{r_1 + \varnothing}) + \varnothing$\smallskip\\
-$\equiv$ & $\underline{r_1 + \varnothing}$\smallskip\\
+ & $(r_1 + \ZERO) \cdot \ONE + ((\ONE + r_2) + r_3) \cdot
+(\underline{r_4 \cdot \ZERO})$\smallskip\\
+$\equiv$ & $(r_1 + \ZERO) \cdot \ONE + \underline{((\ONE + r_2) + r_3) \cdot
+\ZERO}$\smallskip\\
+$\equiv$ & $\underline{(r_1 + \ZERO) \cdot \ONE} + \ZERO$\smallskip\\
+$\equiv$ & $(\underline{r_1 + \ZERO}) + \ZERO$\smallskip\\
+$\equiv$ & $\underline{r_1 + \ZERO}$\smallskip\\
$\equiv$ & $r_1$\
\end{tabular}
\end{center}
\noindent In each step, I underlined where a simplification
rule is applied. Our matching algorithm in the next section
-will often generate such ``useless'' $\epsilon$s and
-$\varnothing$s, therefore simplifying them away will make the
+will often generate such ``useless'' $\ONE$s and
+$\ZERO$s, therefore simplifying them away will make the
algorithm quite a bit faster.
\subsection*{The Matching Algorithm}
@@ -209,8 +215,8 @@
\begin{center}
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
-$nullable(\varnothing)$ & $\dn$ & $\textit{false}$\\
-$nullable(\epsilon)$ & $\dn$ & $true$\\
+$nullable(\ZERO)$ & $\dn$ & $\textit{false}$\\
+$nullable(\ONE)$ & $\dn$ & $true$\\
$nullable(c)$ & $\dn$ & $\textit{false}$\\
$nullable(r_1 + r_2)$ & $\dn$ & $nullable(r_1) \vee nullable(r_2)$\\
$nullable(r_1 \cdot r_2)$ & $\dn$ & $nullable(r_1) \wedge nullable(r_2)$\\
@@ -243,9 +249,9 @@
\begin{center}
\begin{tabular}{l@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
- $der\, c\, (\varnothing)$ & $\dn$ & $\varnothing$\\
- $der\, c\, (\epsilon)$ & $\dn$ & $\varnothing$ \\
- $der\, c\, (d)$ & $\dn$ & if $c = d$ then $\epsilon$ else $\varnothing$\\
+ $der\, c\, (\ZERO)$ & $\dn$ & $\ZERO$\\
+ $der\, c\, (\ONE)$ & $\dn$ & $\ZERO$ \\
+ $der\, c\, (d)$ & $\dn$ & if $c = d$ then $\ONE$ else $\ZERO$\\
$der\, c\, (r_1 + r_2)$ & $\dn$ & $der\, c\, r_1 + der\, c\, r_2$\\
$der\, c\, (r_1 \cdot r_2)$ & $\dn$ & if $nullable (r_1)$\\
& & then $(der\,c\,r_1) \cdot r_2 + der\, c\, r_2$\\
@@ -258,14 +264,14 @@
follows: recall that $der$ should calculate a regular
expression so that given the ``input'' regular expression can
match a string of the form $c\!::\!s$, we want a regular
-expression for $s$. Since neither $\varnothing$ nor $\epsilon$
+expression for $s$. Since neither $\ZERO$ nor $\ONE$
can match a string of the form $c\!::\!s$, we return
-$\varnothing$. In the third case we have to make a
+$\ZERO$. In the third case we have to make a
case-distinction: In case the regular expression is $c$, then
clearly it can recognise a string of the form $c\!::\!s$, just
that $s$ is the empty string. Therefore we return the
-$\epsilon$-regular expression. In the other case we again
-return $\varnothing$ since no string of the $c\!::\!s$ can be
+$\ONE$-regular expression. In the other case we again
+return $\ZERO$ since no string of the $c\!::\!s$ can be
matched. Next come the recursive cases, which are a bit more
involved. Fortunately, the $+$-case is still relatively
straightforward: all strings of the form $c\!::\!s$ are either
@@ -292,9 +298,9 @@
the definition of $der$ by considering the following operation
on sets:
-\[
+\begin{equation}\label{Der}
Der\,c\,A\;\dn\;\{s\,|\,c\!::\!s \in A\}
-\]
+\end{equation}
\noindent This operation essentially transforms a set of
strings $A$ by filtering out all strings that do not start
@@ -406,7 +412,7 @@
\begin{figure}[p]
\lstinputlisting{../progs/app5.scala}
\caption{Scala implementation of the nullable and
- derivatives functions. These functions are easy to
+ derivative functions. These functions are easy to
implement in functional languages, because pattern
matching and recursion allow us to mimic the mathematical
definitions very closely.\label{scala1}}
@@ -460,7 +466,7 @@
\noindent Our algorithm traverses such regular expressions at
least once every time a derivative is calculated. So having
large regular expressions will cause problems. This problem
-is aggravated by $a^?$ being represented as $a + \epsilon$.
+is aggravated by $a^?$ being represented as $a + \ONE$.
We can however fix this by having an explicit constructor for
$r^{\{n\}}$. In Scala we would introduce a constructor like
@@ -508,14 +514,14 @@
need to traverse the whole regular expression. There seems,
however, one more issue for making the algorithm run faster.
The derivative function often produces ``useless''
-$\varnothing$s and $\epsilon$s. To see this, consider $r = ((a
+$\ZERO$s and $\ONE$s. To see this, consider $r = ((a
\cdot b) + b)^*$ and the following two derivatives
\begin{center}
\begin{tabular}{l}
-$der\,a\,r = ((\epsilon \cdot b) + \varnothing) \cdot r$\\
-$der\,b\,r = ((\varnothing \cdot b) + \epsilon)\cdot r$\\
-$der\,c\,r = ((\varnothing \cdot b) + \varnothing)\cdot r$
+$der\,a\,r = ((\ONE \cdot b) + \ZERO) \cdot r$\\
+$der\,b\,r = ((\ZERO \cdot b) + \ONE)\cdot r$\\
+$der\,c\,r = ((\ZERO \cdot b) + \ZERO)\cdot r$
\end{tabular}
\end{center}
@@ -528,7 +534,7 @@
\begin{tabular}{l}
$der\,a\,r \equiv b \cdot r$\\
$der\,b\,r \equiv r$\\
-$der\,c\,r \equiv \varnothing$
+$der\,c\,r \equiv \ZERO$
\end{tabular}
\end{center}
@@ -553,7 +559,7 @@
calls \texttt{der} first, but then simplifies
the resulting derivative regular expressions before
building the next derivative, see
-Line~28.\label{scala2}}
+Line~\ref{simpline}.\label{scala2}}
\end{figure}
\begin{center}
@@ -590,8 +596,8 @@
\begin{center}
\begin{tabular}{r@{\hspace{1mm}}r@{\hspace{1mm}}l@{\hspace{13mm}}l}
- $r$ & $::=$ & $\varnothing$ & null\\
- & $\mid$ & $\epsilon$ & empty string / \texttt{""} / []\\
+ $r$ & $::=$ & $\ZERO$ & null language\\
+ & $\mid$ & $\ONE$ & empty string / \texttt{""} / []\\
& $\mid$ & $c$ & single character\\
& $\mid$ & $r_1 + r_2$ & alternative / choice\\
& $\mid$ & $r_1 \cdot r_2$ & sequence\\
@@ -604,7 +610,7 @@
the recipe:
\begin{itemize}
-\item $P$ has to hold for $\varnothing$, $\epsilon$ and $c$
+\item $P$ has to hold for $\ZERO$, $\ONE$ and $c$
(these are the base cases).
\item $P$ has to hold for $r_1 + r_2$ under the assumption
that $P$ already holds for $r_1$ and $r_2$.
@@ -625,21 +631,21 @@
\noindent
Let us say that this property is $P(r)$, then the first case
-we need to check is whether $P(\varnothing)$ (see recipe
+we need to check is whether $P(\ZERO)$ (see recipe
above). So we have to show that
\[
-nullable(\varnothing) \;\;\text{if and only if}\;\;
-[]\in L(\varnothing)
+nullable(\ZERO) \;\;\text{if and only if}\;\;
+[]\in L(\ZERO)
\]
-\noindent whereby $nullable(\varnothing)$ is by definition of
+\noindent whereby $nullable(\ZERO)$ is by definition of
the function $nullable$ always $\textit{false}$. We also have
-that $L(\varnothing)$ is by definition $\{\}$. It is
+that $L(\ZERO)$ is by definition $\{\}$. It is
impossible that the empty string $[]$ is in the empty set.
Therefore also the right-hand side is false. Consequently we
verified this case: both sides are false. We would still need
-to do this for $P(\varepsilon)$ and $P(c)$. I leave this to
+to do this for $P(\ONE)$ and $P(c)$. I leave this to
you to verify.
Next we need to check the inductive cases, for example
@@ -718,9 +724,16 @@
\label{dersprop}
\end{equation}
-\noindent by induction on $s$. In this proof you can assume
-the following property for $der$ and $Der$ has already been
-proved, that is you can assume
+\noindent by induction on $s$. Recall $Der$ is defined for
+character---see \eqref{Der}; $Ders$ is similar, but for strings:
+
+\[
+Ders\,s\,A\;\dn\;\{s'\,|\,s @ s' \in A\}
+\]
+
+\noindent In this proof you can assume the following property
+for $der$ and $Der$ has already been proved, that is you can
+assume
\[
L(der\,c\,r) = Der\,c\,(L(r))
--- a/langs.sty Tue Mar 22 17:09:24 2016 +0000
+++ b/langs.sty Wed Apr 06 11:51:33 2016 +0100
@@ -65,3 +65,5 @@
\newcommand{\pcode}[1]{\mbox{\lstset{language={},keywordstyle=\color{black}}\lstinline!#1!}}
\newcommand{\scode}[1]{\mbox{\lstset{language={},basicstyle=\ttfamily\color{codegreen}}\lstinline!#1!}}
\makeatother
+
+\lstset{escapeinside={(*@}{@*)}}
--- a/progs/app5.scala Tue Mar 22 17:09:24 2016 +0000
+++ b/progs/app5.scala Wed Apr 06 11:51:33 2016 +0100
@@ -1,6 +1,6 @@
def nullable (r: Rexp) : Boolean = r match {
- case NULL => false
- case EMPTY => true
+ case ZERO => false
+ case ONE => true
case CHAR(_) => false
case ALT(r1, r2) => nullable(r1) || nullable(r2)
case SEQ(r1, r2) => nullable(r1) && nullable(r2)
@@ -8,9 +8,9 @@
}
def der (c: Char, r: Rexp) : Rexp = r match {
- case NULL => NULL
- case EMPTY => NULL
- case CHAR(d) => if (c == d) EMPTY else NULL
+ case ZERO => ZERO
+ case ONE => ZERO
+ case CHAR(d) => if (c == d) ONE else ZERO
case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
case SEQ(r1, r2) =>
if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
--- a/progs/app51.scala Tue Mar 22 17:09:24 2016 +0000
+++ b/progs/app51.scala Wed Apr 06 11:51:33 2016 +0100
@@ -1,7 +1,7 @@
-def OPT(r: Rexp) = ALT(r, EMPTY)
+def OPT(r: Rexp) = ALT(r, ONE)
def NTIMES(r: Rexp, n: Int) : Rexp = n match {
- case 0 => EMPTY
+ case 0 => ONE
case 1 => r
case n => SEQ(r, NTIMES(r, n - 1))
}
--- a/progs/app6.scala Tue Mar 22 17:09:24 2016 +0000
+++ b/progs/app6.scala Wed Apr 06 11:51:33 2016 +0100
@@ -1,17 +1,17 @@
def simp(r: Rexp): Rexp = r match {
case ALT(r1, r2) => {
(simp(r1), simp(r2)) match {
- case (NULL, r2s) => r2s
- case (r1s, NULL) => r1s
+ case (ZERO, r2s) => r2s
+ case (r1s, ZERO) => r1s
case (r1s, r2s) => if (r1s == r2s) r1s else ALT(r1s, r2s)
}
}
case SEQ(r1, r2) => {
(simp(r1), simp(r2)) match {
- case (NULL, _) => NULL
- case (_, NULL) => NULL
- case (EMPTY, r2s) => r2s
- case (r1s, EMPTY) => r1s
+ case (ZERO, _) => ZERO
+ case (_, ZERO) => ZERO
+ case (ONE, r2s) => r2s
+ case (r1s, ONE) => r1s
case (r1s, r2s) => SEQ(r1s, r2s)
}
}
@@ -21,6 +21,6 @@
def ders (s: List[Char], r: Rexp) : Rexp = s match {
case Nil => r
- case c::s => ders(s, simp(der(c, r)))
+ case c::s => ders(s, simp(der(c, r))) (*@\label{simpline}@*)
}
--- a/progs/crawler1.scala Tue Mar 22 17:09:24 2016 +0000
+++ b/progs/crawler1.scala Wed Apr 06 11:51:33 2016 +0100
@@ -12,13 +12,13 @@
}
// regex for URLs
-val http_pattern = """"https?://[^"]*"""".r
+val http_pattern = """"https?://[^"]*"""".r (*@\label{httpline}@*)
// drops the first and last character from a string
def unquote(s: String) = s.drop(1).dropRight(1)
def get_all_URLs(page: String) : Set[String] =
- http_pattern.findAllIn(page).map(unquote).toSet
+ http_pattern.findAllIn(page).map(unquote).toSet (*@\label{findallline}@*)
// naive version of crawl - searches until a given depth,
--- a/progs/crawler2.scala Tue Mar 22 17:09:24 2016 +0000
+++ b/progs/crawler2.scala Wed Apr 06 11:51:33 2016 +0100
@@ -13,7 +13,7 @@
// regexes for URLs and "my" domain
val http_pattern = """"https?://[^"]*"""".r
-val my_urls = """urbanc""".r
+val my_urls = """urbanc""".r (*@\label{myurlline}@*)
def unquote(s: String) = s.drop(1).dropRight(1)
@@ -21,11 +21,11 @@
http_pattern.findAllIn(page).map(unquote).toSet
def crawl(url: String, n: Int) : Unit = {
- if (n == 0) ()
+ if (n == 0) () (*@\label{changestartline}@*)
else if (my_urls.findFirstIn(url) == None) {
println(s"Visiting: $n $url")
get_page(url); ()
- }
+ } (*@\label{changeendline}@*)
else {
println(s"Visiting: $n $url")
for (u <- get_all_URLs(get_page(url))) crawl(u, n - 1)
--- a/progs/crawler3.scala Tue Mar 22 17:09:24 2016 +0000
+++ b/progs/crawler3.scala Wed Apr 06 11:51:33 2016 +0100
@@ -12,7 +12,7 @@
// regexes for URLs, for "my" domain and for email addresses
val http_pattern = """"https?://[^"]*"""".r
-val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r
+val email_pattern = """([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})""".r (*@\label{emailline}@*)
def unquote(s: String) = s.drop(1).dropRight(1)
@@ -27,7 +27,7 @@
else {
println(s"Visiting: $n $url")
val page = get_page(url)
- print_str(email_pattern.findAllIn(page).mkString("\n"))
+ print_str(email_pattern.findAllIn(page).mkString("\n")) (*@\label{mainline}@*)
for (u <- get_all_URLs(page).par) crawl(u, n - 1)
}
}
--- a/style.sty Tue Mar 22 17:09:24 2016 +0000
+++ b/style.sty Wed Apr 06 11:51:33 2016 +0100
@@ -1,5 +1,5 @@
\usepackage{xcolor}
-\usepackage{fontspec}
+%%\usepackage{fontspec}
\usepackage[sc]{mathpazo}
\usepackage{fontspec}
\setmainfont[Ligatures=TeX]{Palatino Linotype}