coursework/cw01.tex
changeset 504 5dc452d7c08e
parent 499 dfd0f41f8668
child 512 a6aa52ecc1c5
equal deleted inserted replaced
503:f2d7b885b3e3 504:5dc452d7c08e
     1 \documentclass{article}
     1 \documentclass{article}
     2 \usepackage{../style}
     2 \usepackage{../style}
     3 \usepackage{../langs}
     3 \usepackage{../langs}
     4 
     4 
       
     5 \usepackage{array}
       
     6 
       
     7 
     5 \begin{document}
     8 \begin{document}
       
     9 \newcolumntype{C}[1]{>{\centering}m{#1}}
     6 
    10 
     7 \section*{Coursework 1 (Strand 1)}
    11 \section*{Coursework 1 (Strand 1)}
     8 
    12 
     9 This coursework is worth 4\% and is due on 25 October at
    13 This coursework is worth 4\% and is due on 19 October at
    10 16:00. You are asked to implement a regular expression matcher
    14 16:00. You are asked to implement a regular expression matcher
    11 and submit a document containing the answers for the questions
    15 and submit a document containing the answers for the questions
    12 below. You can do the implementation in any programming
    16 below. You can do the implementation in any programming
    13 language you like, but you need to submit the source code with
    17 language you like, but you need to submit the source code with
    14 which you answered the questions, otherwise a mark of 0\% will
    18 which you answered the questions, otherwise a mark of 0\% will
    23 your own effort. You have not copied from anyone else. An
    27 your own effort. You have not copied from anyone else. An
    24 exception is the Scala code I showed during the lectures or
    28 exception is the Scala code I showed during the lectures or
    25 uploaded to KEATS, which you can freely use.\bigskip
    29 uploaded to KEATS, which you can freely use.\bigskip
    26 
    30 
    27 
    31 
    28 \subsubsection*{Task}
    32 \subsection*{Task}
    29 
    33 
    30 The task is to implement a regular expression matcher based on
    34 The task is to implement a regular expression matcher based on
    31 derivatives of regular expressions. The implementation should
    35 derivatives of regular expressions. The implementation should
    32 be able to deal with the usual (basic) regular expressions
    36 be able to deal with the usual (basic) regular expressions
    33 
    37 
    38 \noindent
    42 \noindent
    39 but also with the following extended regular expressions:
    43 but also with the following extended regular expressions:
    40 
    44 
    41 \begin{center}
    45 \begin{center}
    42 \begin{tabular}{ll}
    46 \begin{tabular}{ll}
    43 $[c_1 c_2 \ldots c_n]$ & a range of characters\\
    47   $[c_1,c_2,\ldots,c_n]$ & a set of characters---for character ranges\\
    44 $r^+$ & one or more times $r$\\
    48   $r^+$ & one or more times $r$\\
    45 $r^?$ & optional $r$\\
    49   $r^?$ & optional $r$\\
    46 $r^{\{n,m\}}$ & at least $n$-times $r$ but no more than $m$-times\\
    50   $r^{\{n\}}$ & exactly $n$-times\\
    47 $\sim{}r$ & not-regular expression of $r$\\
    51   $r^{\{..m\}}$ & zero or more times $r$ but no more than $m$-times\\
    48 \end{tabular}
    52   $r^{\{n..\}}$ & at least $n$-times $r$\\
    49 \end{center}
    53   $r^{\{n..m\}}$ & at least $n$-times $r$ but no more than $m$-times\\
    50 
    54   $\sim{}r$ & not-regular-expression of $r$\\
    51 \noindent In the case of $r^{\{n,m\}}$ you can assume the
    55 \end{tabular}
    52 convention that $0 \le n \le m$. The meanings of the extended
    56 \end{center}
    53 regular expressions are
    57 
       
    58 \noindent You can assume that $n$ and $m$ are greater or equal than
       
    59 $0$. In the case of $r^{\{n,m\}}$ you can also assume $0 \le n \le m$.\bigskip
       
    60 
       
    61 \noindent {\bf Important!} Your implementation should have explicit
       
    62 cases for the basic regular expressions, but also for explicit cases for
       
    63 the extended regular expressions. That means do not treat the extended
       
    64 regular expressions by just translating them into the basic ones. See
       
    65 also Question 2, where you are asked to explicitly give the rules for
       
    66 \textit{nullable} and \textit{der} for the extended regular
       
    67 expressions.\newpage
       
    68 
       
    69 \noindent
       
    70 The meanings of the extended regular expressions are
    54 
    71 
    55 \begin{center}
    72 \begin{center}
    56 \begin{tabular}{r@{\hspace{2mm}}c@{\hspace{2mm}}l}
    73 \begin{tabular}{r@{\hspace{2mm}}c@{\hspace{2mm}}l}
    57 $L([c_1 c_2 \ldots c_n])$ & $\dn$ & $\{[c_1], [c_2], \ldots, [c_n]\}$\\ 
    74   $L([c_1,c_2,\ldots,c_n])$ & $\dn$ & $\{[c_1], [c_2], \ldots, [c_n]\}$\\ 
    58 $L(r^+)$                  & $\dn$ & $\bigcup_{1\le i}. L(r)^i$\\
    75   $L(r^+)$                  & $\dn$ & $\bigcup_{1\le i}.\;L(r)^i$\\
    59 $L(r^?)$                  & $\dn$ & $L(r) \cup \{[]\}$\\
    76   $L(r^?)$                  & $\dn$ & $L(r) \cup \{[]\}$\\
    60 $L(r^{\{n,m\}})$           & $\dn$ & $\bigcup_{n\le i \le m}. L(r)^i$\\
    77   $L(r^{\{n\}})$             & $\dn$ & $L(r)^n$\\
    61 $L(\sim{}r)$              & $\dn$ & $\Sigma^* - L(r)$
    78   $L(r^{\{..m\}})$           & $\dn$ & $\bigcup_{0\le i \le m}.\;L(r)^i$\\
       
    79   $L(r^{\{n..\}})$           & $\dn$ & $\bigcup_{n\le i}.\;L(r)^i$\\
       
    80   $L(r^{\{n..m\}})$          & $\dn$ & $\bigcup_{n\le i \le m}.\;L(r)^i$\\
       
    81   $L(\sim{}r)$              & $\dn$ & $\Sigma^* - L(r)$
    62 \end{tabular}
    82 \end{tabular}
    63 \end{center}
    83 \end{center}
    64 
    84 
    65 \noindent whereby in the last clause the set $\Sigma^*$ stands
    85 \noindent whereby in the last clause the set $\Sigma^*$ stands
    66 for the set of \emph{all} strings over the alphabet $\Sigma$
    86 for the set of \emph{all} strings over the alphabet $\Sigma$
    67 (in the implementation the alphabet can be just what is
    87 (in the implementation the alphabet can be just what is
    68 represented by, say, the type \pcode{Char}). So $\sim{}r$
    88 represented by, say, the type \pcode{Char}). So $\sim{}r$
    69 means `all the strings that $r$ cannot match'. 
    89 means in effect ``all the strings that $r$ cannot match''.\medskip 
    70 
    90 
       
    91 \noindent
    71 Be careful that your implementation of \textit{nullable} and
    92 Be careful that your implementation of \textit{nullable} and
    72 \textit{der} satisfies for every $r$ the following two
    93 \textit{der} satisfies for every regular expression $r$ the following
    73 properties (see also Question 2):
    94 two properties (see also Question 2):
    74 
    95 
    75 \begin{itemize}
    96 \begin{itemize}
    76 \item $\textit{nullable}(r)$ if and only if $[]\in L(r)$
    97 \item $\textit{nullable}(r)$ if and only if $[]\in L(r)$
    77 \item $L(der\,c\,r) = Der\,c\,(L(r))$
    98 \item $L(der\,c\,r) = Der\,c\,(L(r))$
    78 \end{itemize}
    99 \end{itemize}
    79 
   100 
    80 \noindent {\bf Important!} Your implementation should have
       
    81 explicit cases for the basic regular expressions, but also
       
    82 explicit cases for the extended regular expressions. That
       
    83 means do not treat the extended regular expressions by just
       
    84 translating them into the basic ones. See also Question 2,
       
    85 where you are asked to explicitly give the rules for
       
    86 \textit{nullable} and \textit{der} for the extended regular
       
    87 expressions.
       
    88 
   101 
    89 
   102 
    90 \subsection*{Question 1}
   103 \subsection*{Question 1}
    91 
   104 
    92 What is your King's email address (you will need it in
   105 What is your King's email address (you will need it in
    93 Question 3)?
   106 Question 4)?
    94 
   107 
    95 \subsection*{Question 2}
   108 \subsection*{Question 2}
    96 
   109 
    97 This question does not require any implementation. From the
   110 From the
    98 lectures you have seen the definitions for the functions
   111 lectures you have seen the definitions for the functions
    99 \textit{nullable} and \textit{der} for the basic regular
   112 \textit{nullable} and \textit{der} for the basic regular
   100 expressions. Give the rules for the extended regular
   113 expressions. Implement the rules for the extended regular
   101 expressions:
   114 expressions:
   102 
   115 
   103 \begin{center}
   116 \begin{center}
   104 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
   117 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
   105 $\textit{nullable}([c_1 c_2 \ldots c_n])$  & $\dn$ & $?$\\
   118   $\textit{nullable}([c_1,c_2,\ldots,c_n])$  & $\dn$ & $?$\\
   106 $\textit{nullable}(r^+)$                   & $\dn$ & $?$\\
   119   $\textit{nullable}(r^+)$                   & $\dn$ & $?$\\
   107 $\textit{nullable}(r^?)$                   & $\dn$ & $?$\\
   120   $\textit{nullable}(r^?)$                   & $\dn$ & $?$\\
   108 $\textit{nullable}(r^{\{n,m\}})$            & $\dn$ & $?$\\
   121   $\textit{nullable}(r^{\{n\}})$              & $\dn$ & $?$\\
   109 $\textit{nullable}(\sim{}r)$               & $\dn$ & $?$\medskip\\
   122   $\textit{nullable}(r^{\{..m\}})$            & $\dn$ & $?$\\
   110 $der\, c\, ([c_1 c_2 \ldots c_n])$  & $\dn$ & $?$\\
   123   $\textit{nullable}(r^{\{n..\}})$            & $\dn$ & $?$\\
   111 $der\, c\, (r^+)$                   & $\dn$ & $?$\\
   124   $\textit{nullable}(r^{\{n..m\}})$           & $\dn$ & $?$\\
   112 $der\, c\, (r^?)$                   & $\dn$ & $?$\\
   125   $\textit{nullable}(\sim{}r)$              & $\dn$ & $?$
   113 $der\, c\, (r^{\{n,m\}})$            & $\dn$ & $?$\\
   126 \end{tabular}
   114 $der\, c\, (\sim{}r)$               & $\dn$ & $?$\\
   127 \end{center}
       
   128 
       
   129 \begin{center}
       
   130 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
       
   131   $der\, c\, ([c_1,c_2,\ldots,c_n])$  & $\dn$ & $?$\\
       
   132   $der\, c\, (r^+)$                   & $\dn$ & $?$\\
       
   133   $der\, c\, (r^?)$                   & $\dn$ & $?$\\
       
   134   $der\, c\, (r^{\{n\}})$              & $\dn$ & $?$\\
       
   135   $der\, c\, (r^{\{..m\}})$           & $\dn$ & $?$\\
       
   136   $der\, c\, (r^{\{n..\}})$           & $\dn$ & $?$\\
       
   137   $der\, c\, (r^{\{n..m\}})$           & $\dn$ & $?$\\
       
   138   $der\, c\, (\sim{}r)$               & $\dn$ & $?$\\
   115 \end{tabular}
   139 \end{tabular}
   116 \end{center}
   140 \end{center}
   117 
   141 
   118 \noindent
   142 \noindent
   119 Remember your definitions have to satisfy the two properties
   143 Remember your definitions have to satisfy the two properties
   121 \begin{itemize}
   145 \begin{itemize}
   122 \item $\textit{nullable}(r)$ if and only if $[]\in L(r)$
   146 \item $\textit{nullable}(r)$ if and only if $[]\in L(r)$
   123 \item $L(der\,c\,r)) = Der\,c\,(L(r))$
   147 \item $L(der\,c\,r)) = Der\,c\,(L(r))$
   124 \end{itemize}
   148 \end{itemize}
   125 
   149 
       
   150 \noindent
       
   151 Given the definitions of \textit{nullable} and \textit{der}, it is
       
   152 easy to implement a regular expression matcher.  Test your regular
       
   153 expression matcher with (at least) the examples:
       
   154 
       
   155 
       
   156 \begin{center}
       
   157 \def\arraystretch{1.2}  
       
   158 \begin{tabular}{r|m{12mm}|m{12mm}|m{12mm}|m{12mm}|m{12mm}|m{12mm}}
       
   159   string & $a^{\{3\}}$ & $(a^?)^{\{3\}}$ & $a^{\{..3\}}$ &
       
   160      $(a^?)^{\{..3\}}$ & $a^{\{3..5\}}$ & $(a^?)^{\{3..5\}}$\\\hline
       
   161   $[]$           &&&&&& \\\hline 
       
   162   \texttt{a}     &&&&&& \\\hline 
       
   163   \texttt{aa}    &&&&&& \\\hline 
       
   164   \texttt{aaa}   &&&&&& \\\hline 
       
   165   \texttt{aaaaa} &&&&&& \\\hline 
       
   166   \texttt{aaaaaa}&&&&&& \\
       
   167 \end{tabular}
       
   168 \end{center}
       
   169 
       
   170 \noindent
       
   171 Does your matcher produce the expected results?
       
   172 
   126 \subsection*{Question 3}
   173 \subsection*{Question 3}
   127 
   174 
   128 Implement the following regular expression for email addresses
   175 As you can see, there are a number of explicit regular expressions
       
   176 that deal with single or several characters, for example:
       
   177 
       
   178 \begin{center}
       
   179 \begin{tabular}{ll}
       
   180   $c$ & matches a single character\\  
       
   181   $[c_1,c_2,\ldots,c_n]$ & matches a set of characters---for character ranges\\
       
   182   $\textit{ALL}$ & matches any character
       
   183 \end{tabular}
       
   184 \end{center}
       
   185 
       
   186 \noindent
       
   187 the latter is useful for matching any string (for example
       
   188 by using $\textit{ALL}^*$). In order to avoid having an explicit constructor
       
   189 for each case, we can generalise all these cases and introduce a single
       
   190 constructor $\textit{CFUN}(f)$ where $f$ is a function from characters
       
   191 to a boolean. The idea is that the function $f$ determines which character(s)
       
   192 are matched, namely those where $f$ returns \texttt{true}.
       
   193 In this question implement \textit{CFUN} and define
       
   194 
       
   195 \begin{center}
       
   196 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
       
   197   $\textit{nullable}(\textit{CFUN}(f))$  & $\dn$ & $?$\\
       
   198   $\textit{der}\,c\,(\textit{CFUN}(f))$  & $\dn$ & $?$
       
   199 \end{tabular}
       
   200 \end{center}
       
   201 
       
   202 \noindent in your matcher and then also give definitions for
       
   203 
       
   204 \begin{center}
       
   205 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
       
   206   $c$  & $\dn$ & $\textit{CFUN}(?)$\\
       
   207   $[c_1,c_2,\ldots,c_n]$  & $\dn$ & $\textit{CFUN}(?)$\\
       
   208   $\textit{ALL}$  & $\dn$ & $\textit{CFUN}(?)$
       
   209 \end{tabular}
       
   210 \end{center}
       
   211 
       
   212 
       
   213 \subsection*{Question 4}
       
   214 
       
   215 Suppose $[a\mbox{-}z0\mbox{-}9\_\,.\mbox{-}]$ stands for the regular expression
       
   216 
       
   217 \[[a,b,c,\ldots,z,0,\dots,9,\_,.,\mbox{-}]\;.\]
       
   218 
       
   219 \noindent
       
   220 Define in your code the following regular expression for email addresses
   129 
   221 
   130 \[
   222 \[
   131 ([a\mbox{-}z0\mbox{-}9\_\!\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}})
   223 ([a\mbox{-}z0\mbox{-}9\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}})
   132 \]
   224 \]
   133 
   225 
   134 \noindent and calculate the derivative according to your email
   226 \noindent and calculate the derivative according to your email
   135 address. When calculating the derivative, simplify all regular
   227 address. When calculating the derivative, simplify all regular
   136 expressions as much as possible by applying the
   228 expressions as much as possible by applying the
   149 \end{center}
   241 \end{center}
   150 
   242 
   151 \noindent Write down your simplified derivative in a readable
   243 \noindent Write down your simplified derivative in a readable
   152 notation using parentheses where necessary. That means you
   244 notation using parentheses where necessary. That means you
   153 should use the infix notation $+$, $\cdot$, $^*$ and so on,
   245 should use the infix notation $+$, $\cdot$, $^*$ and so on,
   154 instead of code.
   246 instead of code.\bigskip
   155  
   247  
   156 \subsection*{Question 4}
   248 \noindent
   157 
   249 Implement the simplification rules in your regular expression matcher.
   158 Suppose \textit{[a-z]} stands for the range regular expression
   250 Consider the regular expression $/ \cdot * \cdot
   159 $[a,b,c,\ldots,z]$.  Consider the regular expression $/ \cdot * \cdot
   251 (\sim{}(\textit{ALL}^* \cdot * \cdot / \cdot \textit{ALL}^*)) \cdot *
   160 (\sim{}([a\mbox{-}z]^* \cdot * \cdot / \cdot [a\mbox{-}z]^*)) \cdot *
       
   161 \cdot /$ and decide wether the following four strings are matched by
   252 \cdot /$ and decide wether the following four strings are matched by
   162 this regular expression. Answer yes or no.
   253 this regular expression. Answer yes or no.
   163 
   254 
   164 \begin{enumerate}
   255 \begin{enumerate}
   165 \item \texttt{"/**/"}
   256 \item \texttt{"/**/"}
   167 \item \texttt{"/*test*/test*/"}
   258 \item \texttt{"/*test*/test*/"}
   168 \item \texttt{"/*test/*test*/"}
   259 \item \texttt{"/*test/*test*/"}
   169 \end{enumerate}
   260 \end{enumerate}
   170 
   261 
   171 \noindent
   262 \noindent
   172 Also test your regular expression matcher with the regular
   263 Also let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be
   173 expression $a^{\{3,5\}}$ and the strings
       
   174 
       
   175 \begin{enumerate}
       
   176 \setcounter{enumi}{4}
       
   177 \item \texttt{aa}
       
   178 \item \texttt{aaa}
       
   179 \item \texttt{aaaaa}
       
   180 \item \texttt{aaaaaa}
       
   181 \end{enumerate}
       
   182 
       
   183 \noindent
       
   184 Does your matcher produce the expected results?
       
   185 
       
   186 \subsection*{Question 5}
       
   187 
       
   188 Let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be
       
   189 $(a^{\{19,19\}}) \cdot (a^?)$.  Decide whether the following three
   264 $(a^{\{19,19\}}) \cdot (a^?)$.  Decide whether the following three
   190 strings consisting of $a$s only can be matched by $(r_1^+)^+$.
   265 strings consisting of $a$s only can be matched by $(r_1^+)^+$.
   191 Similarly test them with $(r_2^+)^+$. Again answer in all six cases
   266 Similarly test them with $(r_2^+)^+$. Again answer in all six cases
   192 with yes or no. \medskip
   267 with yes or no. \medskip
   193 
   268 
   195 These are strings are meant to be entirely made up of $a$s. Be careful
   270 These are strings are meant to be entirely made up of $a$s. Be careful
   196 when copy-and-pasting the strings so as to not forgetting any $a$ and
   271 when copy-and-pasting the strings so as to not forgetting any $a$ and
   197 to not introducing any other character.
   272 to not introducing any other character.
   198 
   273 
   199 \begin{enumerate}
   274 \begin{enumerate}
       
   275 \setcounter{enumi}{4}
   200 \item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
   276 \item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
   201 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
   277 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
   202 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}
   278 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}
   203 \item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
   279 \item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
   204 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
   280 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
   207 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
   283 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
   208 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}
   284 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}
   209 \end{enumerate}
   285 \end{enumerate}
   210 
   286 
   211 
   287 
       
   288 
   212 \end{document}
   289 \end{document}
   213 
   290 
   214 %%% Local Variables: 
   291 %%% Local Variables: 
   215 %%% mode: latex
   292 %%% mode: latex
   216 %%% TeX-master: t
   293 %%% TeX-master: t