coursework/cw01.tex
changeset 127 41ef073ac6c4
child 128 44863a6b468a
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/coursework/cw01.tex	Mon Oct 07 04:04:06 2013 +0100
@@ -0,0 +1,138 @@
+\documentclass{article}
+\usepackage{charter}
+\usepackage{hyperref}
+\usepackage{amssymb}
+\usepackage{amsmath}
+
+\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
+\begin{document}
+
+\section*{Coursework 1}
+
+This coursework is worth 3\% and is due on 12 November at 16:00. You are asked to implement 
+a regular expression matcher and submit a document containing the answers for the questions 
+below. You can do the implementation in any programming language you like, but you need 
+to submit the source code with which you answered the questions. However, the coursework 
+will \emph{only} be judged according to the answers only. \bigskip
+
+\noindent
+The task is to implement a regular expression matcher based on derivatives. The implementation 
+should be able to deal with the usual regular expressions
+
+\[
+\varnothing, \epsilon, c, r_1 + r_2, r_1 \cdot r_2, r^*
+\]
+
+\noindent
+but also with
+
+\begin{center}
+\begin{tabular}{ll}
+$[c_1 c_2 \ldots c_n]$ & a range of characters\\
+$r^+$ & one or more times $r$\\
+$r^?$ & optional $r$\\
+$r^{\{n,m\}}$ & at least $n$-times $r$ but no more than $m$-times\\
+$\sim{}r$ & not-regular expression of $r$\\
+\end{tabular}
+\end{center}
+
+\noindent
+In the case of $r^{\{n,m\}}$ we have the convention that $0 \le n \le m$.
+The meaning of these regular expressions is
+
+\begin{center}
+\begin{tabular}{r@{\hspace{2mm}}c@{\hspace{2mm}}l}
+$L([c_1 c_2 \ldots c_n])$ & $\dn$ & $\{"c_1", "c_2", \ldots, "c_n"\}$\\ 
+$L(r^+)$            & $\dn$ & $\bigcup_{1\le i}. L(r)^i$\\
+$L(r^?)$            & $\dn$ & $L(r) \cup \{""\}$\\
+$L(r^{\{n,m\}})$ & $\dn$ & $\bigcup_{n\le i \le m}. L(r)^i$\\
+$L(\sim{}r)$       & $\dn$ & $UNIV - L(r)$
+\end{tabular}
+\end{center}
+
+\noindent
+whereby in the last clause the set $UNIV$ stands for the set of \emph{all} strings.
+So $\sim{}r$ means `all the strings that $r$ cannot match'. We assume ranges 
+like $[a\mbox{-}z0\mbox{-}9]$ are a shorthand for the regular expression
+
+\[
+[a b c d\ldots z 0 1\ldots 9]\;.
+\]
+
+\noindent 
+Be careful that your implementations for $nullable$ and $der$ satisfies for every $r$ the following two
+properties:
+
+\begin{itemize}
+\item $nullable(r)$ if and only if $""\in L(r)$
+\item $L(der\,c\,r)) = Der\,c\,(L(r))$
+\end{itemize}
+
+\subsection*{Question 1 (unmarked)}
+
+What is your King's email address (you will need it in the questions below)?\bigskip 
+
+\subsection*{Question 2 (marked with 1\%)}
+
+Implement the following regular expression for email addresses
+
+\[
+([a\mbox{-}z0\mbox{-}9\_\!\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}})
+\]
+
+\noindent
+and calculate the derivative according to your email address. When calculating
+the derivative, simplify all regular expressions as much as possible, but at least apply the following 
+six rules:
+
+\begin{center}
+\begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}l}
+$r \cdot \varnothing$ & $\mapsto$ & $\varnothing$\\ 
+$\varnothing \cdot r$ & $\mapsto$ & $\varnothing$\\ 
+$r \cdot \epsilon$ & $\mapsto$ & $r$\\ 
+$\epsilon \cdot r$ & $\mapsto$ & $r$\\ 
+$r + \varnothing$ & $\mapsto$ & $r$\\ 
+$\varnothing + r$ & $\mapsto$ & $r$\\ 
+\end{tabular}
+\end{center}
+
+\noindent
+Write down your simplified derivative.
+
+\subsection*{Question 3 (marked with 1\%)}
+
+Consider the regular expression $/ \cdot * \cdot (\sim{}([a\mbox{-}z]^* \cdot * \cdot / \cdot [a\mbox{-}z]^*)) \cdot * \cdot /$ and decide
+wether the following four strings are matched by this regular expression. Answer yes or no.
+
+\begin{enumerate}
+\item "/**/"
+\item "/*foobar*/"
+\item "/*test*/test*/"
+\item "/*test/*test*/"
+\end{enumerate}
+
+\subsection*{Question 4 (marked with 1\%)}
+
+Let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be $(a^{\{19,19\}}) \cdot (a^?)$.
+Decide whether the following three strings can be matched by $(r_1^+)^+$. Similarly test them with $(r_2^+)^+$. 
+Again answer in all six cases with yes or no.
+
+\begin{enumerate}
+\item $"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"$
+\item $"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"$
+\item$"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"$
+\end{enumerate}
+
+
+\end{document}
+
+%%% Local Variables: 
+%%% mode: latex
+%%% TeX-master: t
+%%% End: