coursework/cw01.tex
changeset 492 39b7ff2cf1bc
parent 473 dc528091eb70
child 494 d0fc671bcbbf
--- a/coursework/cw01.tex	Wed May 10 17:03:21 2017 +0100
+++ b/coursework/cw01.tex	Sun May 21 00:43:02 2017 +0100
@@ -2,7 +2,11 @@
 \usepackage{../style}
 \usepackage{../langs}
 
+\usepackage{array}
+
+
 \begin{document}
+\newcolumntype{C}[1]{>{\centering}m{#1}}
 
 \section*{Coursework 1 (Strand 1)}
 
@@ -25,7 +29,7 @@
 uploaded to KEATS, which you can freely use.\bigskip
 
 
-\subsubsection*{Task}
+\subsection*{Task}
 
 The task is to implement a regular expression matcher based on
 derivatives of regular expressions. The implementation should
@@ -40,25 +44,41 @@
 
 \begin{center}
 \begin{tabular}{ll}
-$[c_1 c_2 \ldots c_n]$ & a range of characters\\
-$r^+$ & one or more times $r$\\
-$r^?$ & optional $r$\\
-$r^{\{n,m\}}$ & at least $n$-times $r$ but no more than $m$-times\\
-$\sim{}r$ & not-regular expression of $r$\\
+  $[c_1,c_2,\ldots,c_n]$ & a set of characters---for character ranges\\
+  $r^+$ & one or more times $r$\\
+  $r^?$ & optional $r$\\
+  $r^{\{n\}}$ & exactly $n$-times\\
+  $r^{\{..m\}}$ & zero or more times $r$ but no more than $m$-times\\
+  $r^{\{n..\}}$ & at least $n$-times $r$\\
+  $r^{\{n..m\}}$ & at least $n$-times $r$ but no more than $m$-times\\
+  $\sim{}r$ & not-regular-expression of $r$\\
 \end{tabular}
 \end{center}
 
-\noindent In the case of $r^{\{n,m\}}$ you can assume the
-convention that $0 \le n \le m$. The meanings of the extended
-regular expressions are
+\noindent You can assume that $n$ and $m$ are greater or equal than
+$0$. In the case of $r^{\{n,m\}}$ you can also assume $0 \le n \le m$.\bigskip
+
+\noindent {\bf Important!} Your implementation should have explicit
+cases for the basic regular expressions, but also explicit cases for
+the extended regular expressions. That means do not treat the extended
+regular expressions by just translating them into the basic ones. See
+also Question 2, where you are asked to explicitly give the rules for
+\textit{nullable} and \textit{der} for the extended regular
+expressions.\newpage
+
+\noindent
+The meanings of the extended regular expressions are
 
 \begin{center}
 \begin{tabular}{r@{\hspace{2mm}}c@{\hspace{2mm}}l}
-$L([c_1 c_2 \ldots c_n])$ & $\dn$ & $\{[c_1], [c_2], \ldots, [c_n]\}$\\ 
-$L(r^+)$                  & $\dn$ & $\bigcup_{1\le i}. L(r)^i$\\
-$L(r^?)$                  & $\dn$ & $L(r) \cup \{[]\}$\\
-$L(r^{\{n,m\}})$           & $\dn$ & $\bigcup_{n\le i \le m}. L(r)^i$\\
-$L(\sim{}r)$              & $\dn$ & $\Sigma^* - L(r)$
+  $L([c_1,c_2,\ldots,c_n])$ & $\dn$ & $\{[c_1], [c_2], \ldots, [c_n]\}$\\ 
+  $L(r^+)$                  & $\dn$ & $\bigcup_{1\le i}.\;L(r)^i$\\
+  $L(r^?)$                  & $\dn$ & $L(r) \cup \{[]\}$\\
+  $L(r^{\{n\}})$             & $\dn$ & $L(r)^n$\\
+  $L(r^{\{..m\}})$           & $\dn$ & $\bigcup_{0\le i \le m}.\;L(r)^i$\\
+  $L(r^{\{n..\}})$           & $\dn$ & $\bigcup_{n\le i}.\;L(r)^i$\\
+  $L(r^{\{n..m\}})$          & $\dn$ & $\bigcup_{n\le i \le m}.\;L(r)^i$\\
+  $L(\sim{}r)$              & $\dn$ & $\Sigma^* - L(r)$
 \end{tabular}
 \end{center}
 
@@ -66,31 +86,24 @@
 for the set of \emph{all} strings over the alphabet $\Sigma$
 (in the implementation the alphabet can be just what is
 represented by, say, the type \pcode{Char}). So $\sim{}r$
-means `all the strings that $r$ cannot match'. 
+means in effect ``all the strings that $r$ cannot match''.\medskip 
 
+\noindent
 Be careful that your implementation of \textit{nullable} and
-\textit{der} satisfies for every $r$ the following two
-properties (see also Question 2):
+\textit{der} satisfies for every regular expression $r$ the following
+two properties (see also Question 2):
 
 \begin{itemize}
 \item $\textit{nullable}(r)$ if and only if $[]\in L(r)$
 \item $L(der\,c\,r) = Der\,c\,(L(r))$
 \end{itemize}
 
-\noindent {\bf Important!} Your implementation should have
-explicit cases for the basic regular expressions, but also
-explicit cases for the extended regular expressions. That
-means do not treat the extended regular expressions by just
-translating them into the basic ones. See also Question 2,
-where you are asked to explicitly give the rules for
-\textit{nullable} and \textit{der} for the extended regular
-expressions.
 
 
 \subsection*{Question 1}
 
 What is your King's email address (you will need it in
-Question 3)?
+Question 4)?
 
 \subsection*{Question 2}
 
@@ -102,16 +115,27 @@
 
 \begin{center}
 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
-$\textit{nullable}([c_1 c_2 \ldots c_n])$  & $\dn$ & $?$\\
-$\textit{nullable}(r^+)$                   & $\dn$ & $?$\\
-$\textit{nullable}(r^?)$                   & $\dn$ & $?$\\
-$\textit{nullable}(r^{\{n,m\}})$            & $\dn$ & $?$\\
-$\textit{nullable}(\sim{}r)$               & $\dn$ & $?$\medskip\\
-$der\, c\, ([c_1 c_2 \ldots c_n])$  & $\dn$ & $?$\\
-$der\, c\, (r^+)$                   & $\dn$ & $?$\\
-$der\, c\, (r^?)$                   & $\dn$ & $?$\\
-$der\, c\, (r^{\{n,m\}})$            & $\dn$ & $?$\\
-$der\, c\, (\sim{}r)$               & $\dn$ & $?$\\
+  $\textit{nullable}([c_1,c_2,\ldots,c_n])$  & $\dn$ & $?$\\
+  $\textit{nullable}(r^+)$                   & $\dn$ & $?$\\
+  $\textit{nullable}(r^?)$                   & $\dn$ & $?$\\
+  $\textit{nullable}(r^{\{n\}})$              & $\dn$ & $?$\\
+  $\textit{nullable}(r^{\{..m\}})$            & $\dn$ & $?$\\
+  $\textit{nullable}(r^{\{n..\}})$            & $\dn$ & $?$\\
+  $\textit{nullable}(r^{\{n..m\}})$           & $\dn$ & $?$\\
+  $\textit{nullable}(\sim{}r)$              & $\dn$ & $?$
+\end{tabular}
+\end{center}
+
+\begin{center}
+\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
+  $der\, c\, ([c_1,c_2,\ldots,c_n])$  & $\dn$ & $?$\\
+  $der\, c\, (r^+)$                   & $\dn$ & $?$\\
+  $der\, c\, (r^?)$                   & $\dn$ & $?$\\
+  $der\, c\, (r^{\{n\}})$              & $\dn$ & $?$\\
+  $der\, c\, (r^{\{..m\}})$           & $\dn$ & $?$\\
+  $der\, c\, (r^{\{n..\}})$           & $\dn$ & $?$\\
+  $der\, c\, (r^{\{n..m\}})$           & $\dn$ & $?$\\
+  $der\, c\, (\sim{}r)$               & $\dn$ & $?$\\
 \end{tabular}
 \end{center}
 
@@ -124,14 +148,77 @@
 \end{itemize}
 
 \noindent
-Give the implementation and the text-version of the clauses above.
+Given the definitions of \textit{nullable} and \textit{der}, it is
+easy to implement a regular expression matcher.  Test your regular
+expression matcher with (at least) the examples:
+
+
+\begin{center}
+\def\arraystretch{1.2}  
+\begin{tabular}{r|m{12mm}|m{12mm}|m{12mm}|m{12mm}|m{12mm}|m{12mm}}
+  string & $a^{\{3\}}$ & $(a^?)^{\{3\}}$ & $a^{\{..3\}}$ &
+     $(a^?)^{\{..3\}}$ & $a^{\{3..5\}}$ & $(a^?)^{\{3..5\}}$\\\hline
+  $[]$           &&&&&& \\\hline 
+  \texttt{a}     &&&&&& \\\hline 
+  \texttt{aa}    &&&&&& \\\hline 
+  \texttt{aaa}   &&&&&& \\\hline 
+  \texttt{aaaaa} &&&&&& \\\hline 
+  \texttt{aaaaaa}&&&&&& \\
+\end{tabular}
+\end{center}
+
+\noindent
+Does your matcher produce the expected results?
 
 \subsection*{Question 3}
 
-Implement the following regular expression for email addresses
+There are a number of explicit regular expressions that deal with single or several
+characters, for example:
+
+\begin{center}
+\begin{tabular}{ll}
+  $c$ & match a single character\\  
+  $[c_1,c_2,\ldots,c_n]$ & match a set of characters---for character ranges\\
+  $\textit{ALL}$ & match any character
+\end{tabular}
+\end{center}
+
+\noindent
+the latter is useful for matching any string (for example
+$\textit{ALL}^*$). In order to avoid having an explicit constructor
+for each case, we can generalise all such cases and introduce a single
+constructor $\textit{CFUN}(f)$ where $f$ is a function from characters
+to a boolean. Implement
+
+\begin{center}
+\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
+  $\textit{nullable}(\textit{CFUN}(f))$  & $\dn$ & $?$\\
+  $\textit{der}\,c\,(\textit{CFUN}(f))$  & $\dn$ & $?$
+\end{tabular}
+\end{center}
+
+\noindent in your matcher and then give definitions for
+
+\begin{center}
+\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
+  $c$  & $\dn$ & $\textit{CFUN}(?)$\\
+  $[c_1,c_2,\ldots,c_n]$  & $\dn$ & $\textit{CFUN}(?)$\\
+  $\textit{ALL}$  & $\dn$ & $\textit{CFUN}(?)$
+\end{tabular}
+\end{center}
+
+
+\subsection*{Question 4}
+
+Suppose $[a\mbox{-}z0\mbox{-}9\_\,.\mbox{-}]$ stands for the regular expression
+
+\[[a,b,c,\ldots,z,0,\dots,9,\_,.,\mbox{-}]\;.\]
+
+\noindent
+Define in your code the following regular expression for email addresses
 
 \[
-([a\mbox{-}z0\mbox{-}9\_\!\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}})
+([a\mbox{-}z0\mbox{-}9\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}})
 \]
 
 \noindent and calculate the derivative according to your email
@@ -154,13 +241,12 @@
 \noindent Write down your simplified derivative in a readable
 notation using parentheses where necessary. That means you
 should use the infix notation $+$, $\cdot$, $^*$ and so on,
-instead of code.
+instead of code.\bigskip
  
-\subsection*{Question 4}
-
-Suppose \textit{[a-z]} stands for the range regular expression
-$[a,b,c,\ldots,z]$.  Consider the regular expression $/ \cdot * \cdot
-(\sim{}([a\mbox{-}z]^* \cdot * \cdot / \cdot [a\mbox{-}z]^*)) \cdot *
+\noindent
+Implement the simplification rules in your regular expression matcher.
+Consider the regular expression $/ \cdot * \cdot
+(\sim{}(\textit{ALL}^* \cdot * \cdot / \cdot \textit{ALL}^*)) \cdot *
 \cdot /$ and decide wether the following four strings are matched by
 this regular expression. Answer yes or no.
 
@@ -172,24 +258,7 @@
 \end{enumerate}
 
 \noindent
-Also test your regular expression matcher with the regular
-expressions $a^{\{3,5\}}$ and $(a^?)^{\{3,5\}}$. Test whether the
-strings
-
-\begin{enumerate}
-\setcounter{enumi}{4}
-\item \texttt{aa}
-\item \texttt{aaa}
-\item \texttt{aaaaa}
-\item \texttt{aaaaaa}
-\end{enumerate}
-
-\noindent
-are matched or not. Does your matcher produce the expected results?
-
-\subsection*{Question 5}
-
-Let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be
+Also let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be
 $(a^{\{19,19\}}) \cdot (a^?)$.  Decide whether the following three
 strings consisting of $a$s only can be matched by $(r_1^+)^+$.
 Similarly test them with $(r_2^+)^+$. Again answer in all six cases
@@ -201,6 +270,7 @@
 to not introducing any other character.
 
 \begin{enumerate}
+\setcounter{enumi}{4}
 \item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}
@@ -213,6 +283,7 @@
 \end{enumerate}
 
 
+
 \end{document}
 
 %%% Local Variables: