coursework/cw01.tex
author Christian Urban <christian dot urban at kcl dot ac dot uk>
Mon, 07 Oct 2013 04:04:06 +0100
changeset 127 41ef073ac6c4
child 128 44863a6b468a
permissions -rw-r--r--
added

\documentclass{article}
\usepackage{charter}
\usepackage{hyperref}
\usepackage{amssymb}
\usepackage{amsmath}

\newcommand{\dn}{\stackrel{\mbox{\scriptsize def}}{=}}%
\begin{document}

\section*{Coursework 1}

This coursework is worth 3\% and is due on 12 November at 16:00. You are asked to implement 
a regular expression matcher and submit a document containing the answers for the questions 
below. You can do the implementation in any programming language you like, but you need 
to submit the source code with which you answered the questions. However, the coursework 
will \emph{only} be judged according to the answers only. \bigskip

\noindent
The task is to implement a regular expression matcher based on derivatives. The implementation 
should be able to deal with the usual regular expressions

\[
\varnothing, \epsilon, c, r_1 + r_2, r_1 \cdot r_2, r^*
\]

\noindent
but also with

\begin{center}
\begin{tabular}{ll}
$[c_1 c_2 \ldots c_n]$ & a range of characters\\
$r^+$ & one or more times $r$\\
$r^?$ & optional $r$\\
$r^{\{n,m\}}$ & at least $n$-times $r$ but no more than $m$-times\\
$\sim{}r$ & not-regular expression of $r$\\
\end{tabular}
\end{center}

\noindent
In the case of $r^{\{n,m\}}$ we have the convention that $0 \le n \le m$.
The meaning of these regular expressions is

\begin{center}
\begin{tabular}{r@{\hspace{2mm}}c@{\hspace{2mm}}l}
$L([c_1 c_2 \ldots c_n])$ & $\dn$ & $\{"c_1", "c_2", \ldots, "c_n"\}$\\ 
$L(r^+)$            & $\dn$ & $\bigcup_{1\le i}. L(r)^i$\\
$L(r^?)$            & $\dn$ & $L(r) \cup \{""\}$\\
$L(r^{\{n,m\}})$ & $\dn$ & $\bigcup_{n\le i \le m}. L(r)^i$\\
$L(\sim{}r)$       & $\dn$ & $UNIV - L(r)$
\end{tabular}
\end{center}

\noindent
whereby in the last clause the set $UNIV$ stands for the set of \emph{all} strings.
So $\sim{}r$ means `all the strings that $r$ cannot match'. We assume ranges 
like $[a\mbox{-}z0\mbox{-}9]$ are a shorthand for the regular expression

\[
[a b c d\ldots z 0 1\ldots 9]\;.
\]

\noindent 
Be careful that your implementations for $nullable$ and $der$ satisfies for every $r$ the following two
properties:

\begin{itemize}
\item $nullable(r)$ if and only if $""\in L(r)$
\item $L(der\,c\,r)) = Der\,c\,(L(r))$
\end{itemize}

\subsection*{Question 1 (unmarked)}

What is your King's email address (you will need it in the questions below)?\bigskip 

\subsection*{Question 2 (marked with 1\%)}

Implement the following regular expression for email addresses

\[
([a\mbox{-}z0\mbox{-}9\_\!\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}})
\]

\noindent
and calculate the derivative according to your email address. When calculating
the derivative, simplify all regular expressions as much as possible, but at least apply the following 
six rules:

\begin{center}
\begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}l}
$r \cdot \varnothing$ & $\mapsto$ & $\varnothing$\\ 
$\varnothing \cdot r$ & $\mapsto$ & $\varnothing$\\ 
$r \cdot \epsilon$ & $\mapsto$ & $r$\\ 
$\epsilon \cdot r$ & $\mapsto$ & $r$\\ 
$r + \varnothing$ & $\mapsto$ & $r$\\ 
$\varnothing + r$ & $\mapsto$ & $r$\\ 
\end{tabular}
\end{center}

\noindent
Write down your simplified derivative.

\subsection*{Question 3 (marked with 1\%)}

Consider the regular expression $/ \cdot * \cdot (\sim{}([a\mbox{-}z]^* \cdot * \cdot / \cdot [a\mbox{-}z]^*)) \cdot * \cdot /$ and decide
wether the following four strings are matched by this regular expression. Answer yes or no.

\begin{enumerate}
\item "/**/"
\item "/*foobar*/"
\item "/*test*/test*/"
\item "/*test/*test*/"
\end{enumerate}

\subsection*{Question 4 (marked with 1\%)}

Let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be $(a^{\{19,19\}}) \cdot (a^?)$.
Decide whether the following three strings can be matched by $(r_1^+)^+$. Similarly test them with $(r_2^+)^+$. 
Again answer in all six cases with yes or no.

\begin{enumerate}
\item $"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"$
\item $"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"$
\item$"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"$
\end{enumerate}


\end{document}

%%% Local Variables: 
%%% mode: latex
%%% TeX-master: t
%%% End: