# HG changeset patch # User Christian Urban # Date 1381115046 -3600 # Node ID 41ef073ac6c4451d81f51e536ebfb6ed03c29279 # Parent 7c7185cb4f2be77a2d43e73f0bbb5143cb11285a added diff -r 7c7185cb4f2b -r 41ef073ac6c4 coursework/cw01.pdf Binary file coursework/cw01.pdf has changed diff -r 7c7185cb4f2b -r 41ef073ac6c4 coursework/cw01.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/coursework/cw01.tex Mon Oct 07 04:04:06 2013 +0100 @@ -0,0 +1,138 @@ +\documentclass{article} +\usepackage{charter} +\usepackage{hyperref} +\usepackage{amssymb} +\usepackage{amsmath} + +\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}% +\begin{document} + +\section*{Coursework 1} + +This coursework is worth 3\% and is due on 12 November at 16:00. You are asked to implement +a regular expression matcher and submit a document containing the answers for the questions +below. You can do the implementation in any programming language you like, but you need +to submit the source code with which you answered the questions. However, the coursework +will \emph{only} be judged according to the answers only. \bigskip + +\noindent +The task is to implement a regular expression matcher based on derivatives. The implementation +should be able to deal with the usual regular expressions + +\[ +\varnothing, \epsilon, c, r_1 + r_2, r_1 \cdot r_2, r^* +\] + +\noindent +but also with + +\begin{center} +\begin{tabular}{ll} +$[c_1 c_2 \ldots c_n]$ & a range of characters\\ +$r^+$ & one or more times $r$\\ +$r^?$ & optional $r$\\ +$r^{\{n,m\}}$ & at least $n$-times $r$ but no more than $m$-times\\ +$\sim{}r$ & not-regular expression of $r$\\ +\end{tabular} +\end{center} + +\noindent +In the case of $r^{\{n,m\}}$ we have the convention that $0 \le n \le m$. +The meaning of these regular expressions is + +\begin{center} +\begin{tabular}{r@{\hspace{2mm}}c@{\hspace{2mm}}l} +$L([c_1 c_2 \ldots c_n])$ & $\dn$ & $\{"c_1", "c_2", \ldots, "c_n"\}$\\ +$L(r^+)$ & $\dn$ & $\bigcup_{1\le i}. L(r)^i$\\ +$L(r^?)$ & $\dn$ & $L(r) \cup \{""\}$\\ +$L(r^{\{n,m\}})$ & $\dn$ & $\bigcup_{n\le i \le m}. L(r)^i$\\ +$L(\sim{}r)$ & $\dn$ & $UNIV - L(r)$ +\end{tabular} +\end{center} + +\noindent +whereby in the last clause the set $UNIV$ stands for the set of \emph{all} strings. +So $\sim{}r$ means `all the strings that $r$ cannot match'. We assume ranges +like $[a\mbox{-}z0\mbox{-}9]$ are a shorthand for the regular expression + +\[ +[a b c d\ldots z 0 1\ldots 9]\;. +\] + +\noindent +Be careful that your implementations for $nullable$ and $der$ satisfies for every $r$ the following two +properties: + +\begin{itemize} +\item $nullable(r)$ if and only if $""\in L(r)$ +\item $L(der\,c\,r)) = Der\,c\,(L(r))$ +\end{itemize} + +\subsection*{Question 1 (unmarked)} + +What is your King's email address (you will need it in the questions below)?\bigskip + +\subsection*{Question 2 (marked with 1\%)} + +Implement the following regular expression for email addresses + +\[ +([a\mbox{-}z0\mbox{-}9\_\!\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}}) +\] + +\noindent +and calculate the derivative according to your email address. When calculating +the derivative, simplify all regular expressions as much as possible, but at least apply the following +six rules: + +\begin{center} +\begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}l} +$r \cdot \varnothing$ & $\mapsto$ & $\varnothing$\\ +$\varnothing \cdot r$ & $\mapsto$ & $\varnothing$\\ +$r \cdot \epsilon$ & $\mapsto$ & $r$\\ +$\epsilon \cdot r$ & $\mapsto$ & $r$\\ +$r + \varnothing$ & $\mapsto$ & $r$\\ +$\varnothing + r$ & $\mapsto$ & $r$\\ +\end{tabular} +\end{center} + +\noindent +Write down your simplified derivative. + +\subsection*{Question 3 (marked with 1\%)} + +Consider the regular expression $/ \cdot * \cdot (\sim{}([a\mbox{-}z]^* \cdot * \cdot / \cdot [a\mbox{-}z]^*)) \cdot * \cdot /$ and decide +wether the following four strings are matched by this regular expression. Answer yes or no. + +\begin{enumerate} +\item "/**/" +\item "/*foobar*/" +\item "/*test*/test*/" +\item "/*test/*test*/" +\end{enumerate} + +\subsection*{Question 4 (marked with 1\%)} + +Let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be $(a^{\{19,19\}}) \cdot (a^?)$. +Decide whether the following three strings can be matched by $(r_1^+)^+$. Similarly test them with $(r_2^+)^+$. +Again answer in all six cases with yes or no. + +\begin{enumerate} +\item $"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"$ +\item $"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"$ +\item$"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"$ +\end{enumerate} + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: