diff -r f2d7b885b3e3 -r 5dc452d7c08e coursework/cw01.tex --- a/coursework/cw01.tex Tue Sep 26 14:07:29 2017 +0100 +++ b/coursework/cw01.tex Tue Sep 26 14:08:49 2017 +0100 @@ -2,11 +2,15 @@ \usepackage{../style} \usepackage{../langs} +\usepackage{array} + + \begin{document} +\newcolumntype{C}[1]{>{\centering}m{#1}} \section*{Coursework 1 (Strand 1)} -This coursework is worth 4\% and is due on 25 October at +This coursework is worth 4\% and is due on 19 October at 16:00. You are asked to implement a regular expression matcher and submit a document containing the answers for the questions below. You can do the implementation in any programming @@ -25,7 +29,7 @@ uploaded to KEATS, which you can freely use.\bigskip -\subsubsection*{Task} +\subsection*{Task} The task is to implement a regular expression matcher based on derivatives of regular expressions. The implementation should @@ -40,25 +44,41 @@ \begin{center} \begin{tabular}{ll} -$[c_1 c_2 \ldots c_n]$ & a range of characters\\ -$r^+$ & one or more times $r$\\ -$r^?$ & optional $r$\\ -$r^{\{n,m\}}$ & at least $n$-times $r$ but no more than $m$-times\\ -$\sim{}r$ & not-regular expression of $r$\\ + $[c_1,c_2,\ldots,c_n]$ & a set of characters---for character ranges\\ + $r^+$ & one or more times $r$\\ + $r^?$ & optional $r$\\ + $r^{\{n\}}$ & exactly $n$-times\\ + $r^{\{..m\}}$ & zero or more times $r$ but no more than $m$-times\\ + $r^{\{n..\}}$ & at least $n$-times $r$\\ + $r^{\{n..m\}}$ & at least $n$-times $r$ but no more than $m$-times\\ + $\sim{}r$ & not-regular-expression of $r$\\ \end{tabular} \end{center} -\noindent In the case of $r^{\{n,m\}}$ you can assume the -convention that $0 \le n \le m$. The meanings of the extended -regular expressions are +\noindent You can assume that $n$ and $m$ are greater or equal than +$0$. In the case of $r^{\{n,m\}}$ you can also assume $0 \le n \le m$.\bigskip + +\noindent {\bf Important!} Your implementation should have explicit +cases for the basic regular expressions, but also for explicit cases for +the extended regular expressions. That means do not treat the extended +regular expressions by just translating them into the basic ones. See +also Question 2, where you are asked to explicitly give the rules for +\textit{nullable} and \textit{der} for the extended regular +expressions.\newpage + +\noindent +The meanings of the extended regular expressions are \begin{center} \begin{tabular}{r@{\hspace{2mm}}c@{\hspace{2mm}}l} -$L([c_1 c_2 \ldots c_n])$ & $\dn$ & $\{[c_1], [c_2], \ldots, [c_n]\}$\\ -$L(r^+)$ & $\dn$ & $\bigcup_{1\le i}. L(r)^i$\\ -$L(r^?)$ & $\dn$ & $L(r) \cup \{[]\}$\\ -$L(r^{\{n,m\}})$ & $\dn$ & $\bigcup_{n\le i \le m}. L(r)^i$\\ -$L(\sim{}r)$ & $\dn$ & $\Sigma^* - L(r)$ + $L([c_1,c_2,\ldots,c_n])$ & $\dn$ & $\{[c_1], [c_2], \ldots, [c_n]\}$\\ + $L(r^+)$ & $\dn$ & $\bigcup_{1\le i}.\;L(r)^i$\\ + $L(r^?)$ & $\dn$ & $L(r) \cup \{[]\}$\\ + $L(r^{\{n\}})$ & $\dn$ & $L(r)^n$\\ + $L(r^{\{..m\}})$ & $\dn$ & $\bigcup_{0\le i \le m}.\;L(r)^i$\\ + $L(r^{\{n..\}})$ & $\dn$ & $\bigcup_{n\le i}.\;L(r)^i$\\ + $L(r^{\{n..m\}})$ & $\dn$ & $\bigcup_{n\le i \le m}.\;L(r)^i$\\ + $L(\sim{}r)$ & $\dn$ & $\Sigma^* - L(r)$ \end{tabular} \end{center} @@ -66,52 +86,56 @@ for the set of \emph{all} strings over the alphabet $\Sigma$ (in the implementation the alphabet can be just what is represented by, say, the type \pcode{Char}). So $\sim{}r$ -means `all the strings that $r$ cannot match'. +means in effect ``all the strings that $r$ cannot match''.\medskip +\noindent Be careful that your implementation of \textit{nullable} and -\textit{der} satisfies for every $r$ the following two -properties (see also Question 2): +\textit{der} satisfies for every regular expression $r$ the following +two properties (see also Question 2): \begin{itemize} \item $\textit{nullable}(r)$ if and only if $[]\in L(r)$ \item $L(der\,c\,r) = Der\,c\,(L(r))$ \end{itemize} -\noindent {\bf Important!} Your implementation should have -explicit cases for the basic regular expressions, but also -explicit cases for the extended regular expressions. That -means do not treat the extended regular expressions by just -translating them into the basic ones. See also Question 2, -where you are asked to explicitly give the rules for -\textit{nullable} and \textit{der} for the extended regular -expressions. \subsection*{Question 1} What is your King's email address (you will need it in -Question 3)? +Question 4)? \subsection*{Question 2} -This question does not require any implementation. From the +From the lectures you have seen the definitions for the functions \textit{nullable} and \textit{der} for the basic regular -expressions. Give the rules for the extended regular +expressions. Implement the rules for the extended regular expressions: \begin{center} \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} -$\textit{nullable}([c_1 c_2 \ldots c_n])$ & $\dn$ & $?$\\ -$\textit{nullable}(r^+)$ & $\dn$ & $?$\\ -$\textit{nullable}(r^?)$ & $\dn$ & $?$\\ -$\textit{nullable}(r^{\{n,m\}})$ & $\dn$ & $?$\\ -$\textit{nullable}(\sim{}r)$ & $\dn$ & $?$\medskip\\ -$der\, c\, ([c_1 c_2 \ldots c_n])$ & $\dn$ & $?$\\ -$der\, c\, (r^+)$ & $\dn$ & $?$\\ -$der\, c\, (r^?)$ & $\dn$ & $?$\\ -$der\, c\, (r^{\{n,m\}})$ & $\dn$ & $?$\\ -$der\, c\, (\sim{}r)$ & $\dn$ & $?$\\ + $\textit{nullable}([c_1,c_2,\ldots,c_n])$ & $\dn$ & $?$\\ + $\textit{nullable}(r^+)$ & $\dn$ & $?$\\ + $\textit{nullable}(r^?)$ & $\dn$ & $?$\\ + $\textit{nullable}(r^{\{n\}})$ & $\dn$ & $?$\\ + $\textit{nullable}(r^{\{..m\}})$ & $\dn$ & $?$\\ + $\textit{nullable}(r^{\{n..\}})$ & $\dn$ & $?$\\ + $\textit{nullable}(r^{\{n..m\}})$ & $\dn$ & $?$\\ + $\textit{nullable}(\sim{}r)$ & $\dn$ & $?$ +\end{tabular} +\end{center} + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} + $der\, c\, ([c_1,c_2,\ldots,c_n])$ & $\dn$ & $?$\\ + $der\, c\, (r^+)$ & $\dn$ & $?$\\ + $der\, c\, (r^?)$ & $\dn$ & $?$\\ + $der\, c\, (r^{\{n\}})$ & $\dn$ & $?$\\ + $der\, c\, (r^{\{..m\}})$ & $\dn$ & $?$\\ + $der\, c\, (r^{\{n..\}})$ & $\dn$ & $?$\\ + $der\, c\, (r^{\{n..m\}})$ & $\dn$ & $?$\\ + $der\, c\, (\sim{}r)$ & $\dn$ & $?$\\ \end{tabular} \end{center} @@ -123,12 +147,80 @@ \item $L(der\,c\,r)) = Der\,c\,(L(r))$ \end{itemize} +\noindent +Given the definitions of \textit{nullable} and \textit{der}, it is +easy to implement a regular expression matcher. Test your regular +expression matcher with (at least) the examples: + + +\begin{center} +\def\arraystretch{1.2} +\begin{tabular}{r|m{12mm}|m{12mm}|m{12mm}|m{12mm}|m{12mm}|m{12mm}} + string & $a^{\{3\}}$ & $(a^?)^{\{3\}}$ & $a^{\{..3\}}$ & + $(a^?)^{\{..3\}}$ & $a^{\{3..5\}}$ & $(a^?)^{\{3..5\}}$\\\hline + $[]$ &&&&&& \\\hline + \texttt{a} &&&&&& \\\hline + \texttt{aa} &&&&&& \\\hline + \texttt{aaa} &&&&&& \\\hline + \texttt{aaaaa} &&&&&& \\\hline + \texttt{aaaaaa}&&&&&& \\ +\end{tabular} +\end{center} + +\noindent +Does your matcher produce the expected results? + \subsection*{Question 3} -Implement the following regular expression for email addresses +As you can see, there are a number of explicit regular expressions +that deal with single or several characters, for example: + +\begin{center} +\begin{tabular}{ll} + $c$ & matches a single character\\ + $[c_1,c_2,\ldots,c_n]$ & matches a set of characters---for character ranges\\ + $\textit{ALL}$ & matches any character +\end{tabular} +\end{center} + +\noindent +the latter is useful for matching any string (for example +by using $\textit{ALL}^*$). In order to avoid having an explicit constructor +for each case, we can generalise all these cases and introduce a single +constructor $\textit{CFUN}(f)$ where $f$ is a function from characters +to a boolean. The idea is that the function $f$ determines which character(s) +are matched, namely those where $f$ returns \texttt{true}. +In this question implement \textit{CFUN} and define + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} + $\textit{nullable}(\textit{CFUN}(f))$ & $\dn$ & $?$\\ + $\textit{der}\,c\,(\textit{CFUN}(f))$ & $\dn$ & $?$ +\end{tabular} +\end{center} + +\noindent in your matcher and then also give definitions for + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} + $c$ & $\dn$ & $\textit{CFUN}(?)$\\ + $[c_1,c_2,\ldots,c_n]$ & $\dn$ & $\textit{CFUN}(?)$\\ + $\textit{ALL}$ & $\dn$ & $\textit{CFUN}(?)$ +\end{tabular} +\end{center} + + +\subsection*{Question 4} + +Suppose $[a\mbox{-}z0\mbox{-}9\_\,.\mbox{-}]$ stands for the regular expression + +\[[a,b,c,\ldots,z,0,\dots,9,\_,.,\mbox{-}]\;.\] + +\noindent +Define in your code the following regular expression for email addresses \[ -([a\mbox{-}z0\mbox{-}9\_\!\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}}) +([a\mbox{-}z0\mbox{-}9\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}}) \] \noindent and calculate the derivative according to your email @@ -151,13 +243,12 @@ \noindent Write down your simplified derivative in a readable notation using parentheses where necessary. That means you should use the infix notation $+$, $\cdot$, $^*$ and so on, -instead of code. +instead of code.\bigskip -\subsection*{Question 4} - -Suppose \textit{[a-z]} stands for the range regular expression -$[a,b,c,\ldots,z]$. Consider the regular expression $/ \cdot * \cdot -(\sim{}([a\mbox{-}z]^* \cdot * \cdot / \cdot [a\mbox{-}z]^*)) \cdot * +\noindent +Implement the simplification rules in your regular expression matcher. +Consider the regular expression $/ \cdot * \cdot +(\sim{}(\textit{ALL}^* \cdot * \cdot / \cdot \textit{ALL}^*)) \cdot * \cdot /$ and decide wether the following four strings are matched by this regular expression. Answer yes or no. @@ -169,23 +260,7 @@ \end{enumerate} \noindent -Also test your regular expression matcher with the regular -expression $a^{\{3,5\}}$ and the strings - -\begin{enumerate} -\setcounter{enumi}{4} -\item \texttt{aa} -\item \texttt{aaa} -\item \texttt{aaaaa} -\item \texttt{aaaaaa} -\end{enumerate} - -\noindent -Does your matcher produce the expected results? - -\subsection*{Question 5} - -Let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be +Also let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be $(a^{\{19,19\}}) \cdot (a^?)$. Decide whether the following three strings consisting of $a$s only can be matched by $(r_1^+)^+$. Similarly test them with $(r_2^+)^+$. Again answer in all six cases @@ -197,6 +272,7 @@ to not introducing any other character. \begin{enumerate} +\setcounter{enumi}{4} \item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"} @@ -209,6 +285,7 @@ \end{enumerate} + \end{document} %%% Local Variables: