--- a/coursework/cw01.tex Wed May 10 17:03:21 2017 +0100
+++ b/coursework/cw01.tex Sun May 21 00:43:02 2017 +0100
@@ -2,7 +2,11 @@
\usepackage{../style}
\usepackage{../langs}
+\usepackage{array}
+
+
\begin{document}
+\newcolumntype{C}[1]{>{\centering}m{#1}}
\section*{Coursework 1 (Strand 1)}
@@ -25,7 +29,7 @@
uploaded to KEATS, which you can freely use.\bigskip
-\subsubsection*{Task}
+\subsection*{Task}
The task is to implement a regular expression matcher based on
derivatives of regular expressions. The implementation should
@@ -40,25 +44,41 @@
\begin{center}
\begin{tabular}{ll}
-$[c_1 c_2 \ldots c_n]$ & a range of characters\\
-$r^+$ & one or more times $r$\\
-$r^?$ & optional $r$\\
-$r^{\{n,m\}}$ & at least $n$-times $r$ but no more than $m$-times\\
-$\sim{}r$ & not-regular expression of $r$\\
+ $[c_1,c_2,\ldots,c_n]$ & a set of characters---for character ranges\\
+ $r^+$ & one or more times $r$\\
+ $r^?$ & optional $r$\\
+ $r^{\{n\}}$ & exactly $n$-times\\
+ $r^{\{..m\}}$ & zero or more times $r$ but no more than $m$-times\\
+ $r^{\{n..\}}$ & at least $n$-times $r$\\
+ $r^{\{n..m\}}$ & at least $n$-times $r$ but no more than $m$-times\\
+ $\sim{}r$ & not-regular-expression of $r$\\
\end{tabular}
\end{center}
-\noindent In the case of $r^{\{n,m\}}$ you can assume the
-convention that $0 \le n \le m$. The meanings of the extended
-regular expressions are
+\noindent You can assume that $n$ and $m$ are greater or equal than
+$0$. In the case of $r^{\{n,m\}}$ you can also assume $0 \le n \le m$.\bigskip
+
+\noindent {\bf Important!} Your implementation should have explicit
+cases for the basic regular expressions, but also explicit cases for
+the extended regular expressions. That means do not treat the extended
+regular expressions by just translating them into the basic ones. See
+also Question 2, where you are asked to explicitly give the rules for
+\textit{nullable} and \textit{der} for the extended regular
+expressions.\newpage
+
+\noindent
+The meanings of the extended regular expressions are
\begin{center}
\begin{tabular}{r@{\hspace{2mm}}c@{\hspace{2mm}}l}
-$L([c_1 c_2 \ldots c_n])$ & $\dn$ & $\{[c_1], [c_2], \ldots, [c_n]\}$\\
-$L(r^+)$ & $\dn$ & $\bigcup_{1\le i}. L(r)^i$\\
-$L(r^?)$ & $\dn$ & $L(r) \cup \{[]\}$\\
-$L(r^{\{n,m\}})$ & $\dn$ & $\bigcup_{n\le i \le m}. L(r)^i$\\
-$L(\sim{}r)$ & $\dn$ & $\Sigma^* - L(r)$
+ $L([c_1,c_2,\ldots,c_n])$ & $\dn$ & $\{[c_1], [c_2], \ldots, [c_n]\}$\\
+ $L(r^+)$ & $\dn$ & $\bigcup_{1\le i}.\;L(r)^i$\\
+ $L(r^?)$ & $\dn$ & $L(r) \cup \{[]\}$\\
+ $L(r^{\{n\}})$ & $\dn$ & $L(r)^n$\\
+ $L(r^{\{..m\}})$ & $\dn$ & $\bigcup_{0\le i \le m}.\;L(r)^i$\\
+ $L(r^{\{n..\}})$ & $\dn$ & $\bigcup_{n\le i}.\;L(r)^i$\\
+ $L(r^{\{n..m\}})$ & $\dn$ & $\bigcup_{n\le i \le m}.\;L(r)^i$\\
+ $L(\sim{}r)$ & $\dn$ & $\Sigma^* - L(r)$
\end{tabular}
\end{center}
@@ -66,31 +86,24 @@
for the set of \emph{all} strings over the alphabet $\Sigma$
(in the implementation the alphabet can be just what is
represented by, say, the type \pcode{Char}). So $\sim{}r$
-means `all the strings that $r$ cannot match'.
+means in effect ``all the strings that $r$ cannot match''.\medskip
+\noindent
Be careful that your implementation of \textit{nullable} and
-\textit{der} satisfies for every $r$ the following two
-properties (see also Question 2):
+\textit{der} satisfies for every regular expression $r$ the following
+two properties (see also Question 2):
\begin{itemize}
\item $\textit{nullable}(r)$ if and only if $[]\in L(r)$
\item $L(der\,c\,r) = Der\,c\,(L(r))$
\end{itemize}
-\noindent {\bf Important!} Your implementation should have
-explicit cases for the basic regular expressions, but also
-explicit cases for the extended regular expressions. That
-means do not treat the extended regular expressions by just
-translating them into the basic ones. See also Question 2,
-where you are asked to explicitly give the rules for
-\textit{nullable} and \textit{der} for the extended regular
-expressions.
\subsection*{Question 1}
What is your King's email address (you will need it in
-Question 3)?
+Question 4)?
\subsection*{Question 2}
@@ -102,16 +115,27 @@
\begin{center}
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
-$\textit{nullable}([c_1 c_2 \ldots c_n])$ & $\dn$ & $?$\\
-$\textit{nullable}(r^+)$ & $\dn$ & $?$\\
-$\textit{nullable}(r^?)$ & $\dn$ & $?$\\
-$\textit{nullable}(r^{\{n,m\}})$ & $\dn$ & $?$\\
-$\textit{nullable}(\sim{}r)$ & $\dn$ & $?$\medskip\\
-$der\, c\, ([c_1 c_2 \ldots c_n])$ & $\dn$ & $?$\\
-$der\, c\, (r^+)$ & $\dn$ & $?$\\
-$der\, c\, (r^?)$ & $\dn$ & $?$\\
-$der\, c\, (r^{\{n,m\}})$ & $\dn$ & $?$\\
-$der\, c\, (\sim{}r)$ & $\dn$ & $?$\\
+ $\textit{nullable}([c_1,c_2,\ldots,c_n])$ & $\dn$ & $?$\\
+ $\textit{nullable}(r^+)$ & $\dn$ & $?$\\
+ $\textit{nullable}(r^?)$ & $\dn$ & $?$\\
+ $\textit{nullable}(r^{\{n\}})$ & $\dn$ & $?$\\
+ $\textit{nullable}(r^{\{..m\}})$ & $\dn$ & $?$\\
+ $\textit{nullable}(r^{\{n..\}})$ & $\dn$ & $?$\\
+ $\textit{nullable}(r^{\{n..m\}})$ & $\dn$ & $?$\\
+ $\textit{nullable}(\sim{}r)$ & $\dn$ & $?$
+\end{tabular}
+\end{center}
+
+\begin{center}
+\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
+ $der\, c\, ([c_1,c_2,\ldots,c_n])$ & $\dn$ & $?$\\
+ $der\, c\, (r^+)$ & $\dn$ & $?$\\
+ $der\, c\, (r^?)$ & $\dn$ & $?$\\
+ $der\, c\, (r^{\{n\}})$ & $\dn$ & $?$\\
+ $der\, c\, (r^{\{..m\}})$ & $\dn$ & $?$\\
+ $der\, c\, (r^{\{n..\}})$ & $\dn$ & $?$\\
+ $der\, c\, (r^{\{n..m\}})$ & $\dn$ & $?$\\
+ $der\, c\, (\sim{}r)$ & $\dn$ & $?$\\
\end{tabular}
\end{center}
@@ -124,14 +148,77 @@
\end{itemize}
\noindent
-Give the implementation and the text-version of the clauses above.
+Given the definitions of \textit{nullable} and \textit{der}, it is
+easy to implement a regular expression matcher. Test your regular
+expression matcher with (at least) the examples:
+
+
+\begin{center}
+\def\arraystretch{1.2}
+\begin{tabular}{r|m{12mm}|m{12mm}|m{12mm}|m{12mm}|m{12mm}|m{12mm}}
+ string & $a^{\{3\}}$ & $(a^?)^{\{3\}}$ & $a^{\{..3\}}$ &
+ $(a^?)^{\{..3\}}$ & $a^{\{3..5\}}$ & $(a^?)^{\{3..5\}}$\\\hline
+ $[]$ &&&&&& \\\hline
+ \texttt{a} &&&&&& \\\hline
+ \texttt{aa} &&&&&& \\\hline
+ \texttt{aaa} &&&&&& \\\hline
+ \texttt{aaaaa} &&&&&& \\\hline
+ \texttt{aaaaaa}&&&&&& \\
+\end{tabular}
+\end{center}
+
+\noindent
+Does your matcher produce the expected results?
\subsection*{Question 3}
-Implement the following regular expression for email addresses
+There are a number of explicit regular expressions that deal with single or several
+characters, for example:
+
+\begin{center}
+\begin{tabular}{ll}
+ $c$ & match a single character\\
+ $[c_1,c_2,\ldots,c_n]$ & match a set of characters---for character ranges\\
+ $\textit{ALL}$ & match any character
+\end{tabular}
+\end{center}
+
+\noindent
+the latter is useful for matching any string (for example
+$\textit{ALL}^*$). In order to avoid having an explicit constructor
+for each case, we can generalise all such cases and introduce a single
+constructor $\textit{CFUN}(f)$ where $f$ is a function from characters
+to a boolean. Implement
+
+\begin{center}
+\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
+ $\textit{nullable}(\textit{CFUN}(f))$ & $\dn$ & $?$\\
+ $\textit{der}\,c\,(\textit{CFUN}(f))$ & $\dn$ & $?$
+\end{tabular}
+\end{center}
+
+\noindent in your matcher and then give definitions for
+
+\begin{center}
+\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
+ $c$ & $\dn$ & $\textit{CFUN}(?)$\\
+ $[c_1,c_2,\ldots,c_n]$ & $\dn$ & $\textit{CFUN}(?)$\\
+ $\textit{ALL}$ & $\dn$ & $\textit{CFUN}(?)$
+\end{tabular}
+\end{center}
+
+
+\subsection*{Question 4}
+
+Suppose $[a\mbox{-}z0\mbox{-}9\_\,.\mbox{-}]$ stands for the regular expression
+
+\[[a,b,c,\ldots,z,0,\dots,9,\_,.,\mbox{-}]\;.\]
+
+\noindent
+Define in your code the following regular expression for email addresses
\[
-([a\mbox{-}z0\mbox{-}9\_\!\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}})
+([a\mbox{-}z0\mbox{-}9\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}})
\]
\noindent and calculate the derivative according to your email
@@ -154,13 +241,12 @@
\noindent Write down your simplified derivative in a readable
notation using parentheses where necessary. That means you
should use the infix notation $+$, $\cdot$, $^*$ and so on,
-instead of code.
+instead of code.\bigskip
-\subsection*{Question 4}
-
-Suppose \textit{[a-z]} stands for the range regular expression
-$[a,b,c,\ldots,z]$. Consider the regular expression $/ \cdot * \cdot
-(\sim{}([a\mbox{-}z]^* \cdot * \cdot / \cdot [a\mbox{-}z]^*)) \cdot *
+\noindent
+Implement the simplification rules in your regular expression matcher.
+Consider the regular expression $/ \cdot * \cdot
+(\sim{}(\textit{ALL}^* \cdot * \cdot / \cdot \textit{ALL}^*)) \cdot *
\cdot /$ and decide wether the following four strings are matched by
this regular expression. Answer yes or no.
@@ -172,24 +258,7 @@
\end{enumerate}
\noindent
-Also test your regular expression matcher with the regular
-expressions $a^{\{3,5\}}$ and $(a^?)^{\{3,5\}}$. Test whether the
-strings
-
-\begin{enumerate}
-\setcounter{enumi}{4}
-\item \texttt{aa}
-\item \texttt{aaa}
-\item \texttt{aaaaa}
-\item \texttt{aaaaaa}
-\end{enumerate}
-
-\noindent
-are matched or not. Does your matcher produce the expected results?
-
-\subsection*{Question 5}
-
-Let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be
+Also let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be
$(a^{\{19,19\}}) \cdot (a^?)$. Decide whether the following three
strings consisting of $a$s only can be matched by $(r_1^+)^+$.
Similarly test them with $(r_2^+)^+$. Again answer in all six cases
@@ -201,6 +270,7 @@
to not introducing any other character.
\begin{enumerate}
+\setcounter{enumi}{4}
\item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}
@@ -213,6 +283,7 @@
\end{enumerate}
+
\end{document}
%%% Local Variables: