coursework/cw01.tex
changeset 492 39b7ff2cf1bc
parent 473 dc528091eb70
child 494 d0fc671bcbbf
equal deleted inserted replaced
491:d5776c6018f0 492:39b7ff2cf1bc
     1 \documentclass{article}
     1 \documentclass{article}
     2 \usepackage{../style}
     2 \usepackage{../style}
     3 \usepackage{../langs}
     3 \usepackage{../langs}
     4 
     4 
       
     5 \usepackage{array}
       
     6 
       
     7 
     5 \begin{document}
     8 \begin{document}
       
     9 \newcolumntype{C}[1]{>{\centering}m{#1}}
     6 
    10 
     7 \section*{Coursework 1 (Strand 1)}
    11 \section*{Coursework 1 (Strand 1)}
     8 
    12 
     9 This coursework is worth 4\% and is due on 25 October at
    13 This coursework is worth 4\% and is due on 25 October at
    10 16:00. You are asked to implement a regular expression matcher
    14 16:00. You are asked to implement a regular expression matcher
    23 your own effort. You have not copied from anyone else. An
    27 your own effort. You have not copied from anyone else. An
    24 exception is the Scala code I showed during the lectures or
    28 exception is the Scala code I showed during the lectures or
    25 uploaded to KEATS, which you can freely use.\bigskip
    29 uploaded to KEATS, which you can freely use.\bigskip
    26 
    30 
    27 
    31 
    28 \subsubsection*{Task}
    32 \subsection*{Task}
    29 
    33 
    30 The task is to implement a regular expression matcher based on
    34 The task is to implement a regular expression matcher based on
    31 derivatives of regular expressions. The implementation should
    35 derivatives of regular expressions. The implementation should
    32 be able to deal with the usual (basic) regular expressions
    36 be able to deal with the usual (basic) regular expressions
    33 
    37 
    38 \noindent
    42 \noindent
    39 but also with the following extended regular expressions:
    43 but also with the following extended regular expressions:
    40 
    44 
    41 \begin{center}
    45 \begin{center}
    42 \begin{tabular}{ll}
    46 \begin{tabular}{ll}
    43 $[c_1 c_2 \ldots c_n]$ & a range of characters\\
    47   $[c_1,c_2,\ldots,c_n]$ & a set of characters---for character ranges\\
    44 $r^+$ & one or more times $r$\\
    48   $r^+$ & one or more times $r$\\
    45 $r^?$ & optional $r$\\
    49   $r^?$ & optional $r$\\
    46 $r^{\{n,m\}}$ & at least $n$-times $r$ but no more than $m$-times\\
    50   $r^{\{n\}}$ & exactly $n$-times\\
    47 $\sim{}r$ & not-regular expression of $r$\\
    51   $r^{\{..m\}}$ & zero or more times $r$ but no more than $m$-times\\
    48 \end{tabular}
    52   $r^{\{n..\}}$ & at least $n$-times $r$\\
    49 \end{center}
    53   $r^{\{n..m\}}$ & at least $n$-times $r$ but no more than $m$-times\\
    50 
    54   $\sim{}r$ & not-regular-expression of $r$\\
    51 \noindent In the case of $r^{\{n,m\}}$ you can assume the
    55 \end{tabular}
    52 convention that $0 \le n \le m$. The meanings of the extended
    56 \end{center}
    53 regular expressions are
    57 
       
    58 \noindent You can assume that $n$ and $m$ are greater or equal than
       
    59 $0$. In the case of $r^{\{n,m\}}$ you can also assume $0 \le n \le m$.\bigskip
       
    60 
       
    61 \noindent {\bf Important!} Your implementation should have explicit
       
    62 cases for the basic regular expressions, but also explicit cases for
       
    63 the extended regular expressions. That means do not treat the extended
       
    64 regular expressions by just translating them into the basic ones. See
       
    65 also Question 2, where you are asked to explicitly give the rules for
       
    66 \textit{nullable} and \textit{der} for the extended regular
       
    67 expressions.\newpage
       
    68 
       
    69 \noindent
       
    70 The meanings of the extended regular expressions are
    54 
    71 
    55 \begin{center}
    72 \begin{center}
    56 \begin{tabular}{r@{\hspace{2mm}}c@{\hspace{2mm}}l}
    73 \begin{tabular}{r@{\hspace{2mm}}c@{\hspace{2mm}}l}
    57 $L([c_1 c_2 \ldots c_n])$ & $\dn$ & $\{[c_1], [c_2], \ldots, [c_n]\}$\\ 
    74   $L([c_1,c_2,\ldots,c_n])$ & $\dn$ & $\{[c_1], [c_2], \ldots, [c_n]\}$\\ 
    58 $L(r^+)$                  & $\dn$ & $\bigcup_{1\le i}. L(r)^i$\\
    75   $L(r^+)$                  & $\dn$ & $\bigcup_{1\le i}.\;L(r)^i$\\
    59 $L(r^?)$                  & $\dn$ & $L(r) \cup \{[]\}$\\
    76   $L(r^?)$                  & $\dn$ & $L(r) \cup \{[]\}$\\
    60 $L(r^{\{n,m\}})$           & $\dn$ & $\bigcup_{n\le i \le m}. L(r)^i$\\
    77   $L(r^{\{n\}})$             & $\dn$ & $L(r)^n$\\
    61 $L(\sim{}r)$              & $\dn$ & $\Sigma^* - L(r)$
    78   $L(r^{\{..m\}})$           & $\dn$ & $\bigcup_{0\le i \le m}.\;L(r)^i$\\
       
    79   $L(r^{\{n..\}})$           & $\dn$ & $\bigcup_{n\le i}.\;L(r)^i$\\
       
    80   $L(r^{\{n..m\}})$          & $\dn$ & $\bigcup_{n\le i \le m}.\;L(r)^i$\\
       
    81   $L(\sim{}r)$              & $\dn$ & $\Sigma^* - L(r)$
    62 \end{tabular}
    82 \end{tabular}
    63 \end{center}
    83 \end{center}
    64 
    84 
    65 \noindent whereby in the last clause the set $\Sigma^*$ stands
    85 \noindent whereby in the last clause the set $\Sigma^*$ stands
    66 for the set of \emph{all} strings over the alphabet $\Sigma$
    86 for the set of \emph{all} strings over the alphabet $\Sigma$
    67 (in the implementation the alphabet can be just what is
    87 (in the implementation the alphabet can be just what is
    68 represented by, say, the type \pcode{Char}). So $\sim{}r$
    88 represented by, say, the type \pcode{Char}). So $\sim{}r$
    69 means `all the strings that $r$ cannot match'. 
    89 means in effect ``all the strings that $r$ cannot match''.\medskip 
    70 
    90 
       
    91 \noindent
    71 Be careful that your implementation of \textit{nullable} and
    92 Be careful that your implementation of \textit{nullable} and
    72 \textit{der} satisfies for every $r$ the following two
    93 \textit{der} satisfies for every regular expression $r$ the following
    73 properties (see also Question 2):
    94 two properties (see also Question 2):
    74 
    95 
    75 \begin{itemize}
    96 \begin{itemize}
    76 \item $\textit{nullable}(r)$ if and only if $[]\in L(r)$
    97 \item $\textit{nullable}(r)$ if and only if $[]\in L(r)$
    77 \item $L(der\,c\,r) = Der\,c\,(L(r))$
    98 \item $L(der\,c\,r) = Der\,c\,(L(r))$
    78 \end{itemize}
    99 \end{itemize}
    79 
   100 
    80 \noindent {\bf Important!} Your implementation should have
       
    81 explicit cases for the basic regular expressions, but also
       
    82 explicit cases for the extended regular expressions. That
       
    83 means do not treat the extended regular expressions by just
       
    84 translating them into the basic ones. See also Question 2,
       
    85 where you are asked to explicitly give the rules for
       
    86 \textit{nullable} and \textit{der} for the extended regular
       
    87 expressions.
       
    88 
   101 
    89 
   102 
    90 \subsection*{Question 1}
   103 \subsection*{Question 1}
    91 
   104 
    92 What is your King's email address (you will need it in
   105 What is your King's email address (you will need it in
    93 Question 3)?
   106 Question 4)?
    94 
   107 
    95 \subsection*{Question 2}
   108 \subsection*{Question 2}
    96 
   109 
    97 From the
   110 From the
    98 lectures you have seen the definitions for the functions
   111 lectures you have seen the definitions for the functions
   100 expressions. Implement the rules for the extended regular
   113 expressions. Implement the rules for the extended regular
   101 expressions:
   114 expressions:
   102 
   115 
   103 \begin{center}
   116 \begin{center}
   104 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
   117 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
   105 $\textit{nullable}([c_1 c_2 \ldots c_n])$  & $\dn$ & $?$\\
   118   $\textit{nullable}([c_1,c_2,\ldots,c_n])$  & $\dn$ & $?$\\
   106 $\textit{nullable}(r^+)$                   & $\dn$ & $?$\\
   119   $\textit{nullable}(r^+)$                   & $\dn$ & $?$\\
   107 $\textit{nullable}(r^?)$                   & $\dn$ & $?$\\
   120   $\textit{nullable}(r^?)$                   & $\dn$ & $?$\\
   108 $\textit{nullable}(r^{\{n,m\}})$            & $\dn$ & $?$\\
   121   $\textit{nullable}(r^{\{n\}})$              & $\dn$ & $?$\\
   109 $\textit{nullable}(\sim{}r)$               & $\dn$ & $?$\medskip\\
   122   $\textit{nullable}(r^{\{..m\}})$            & $\dn$ & $?$\\
   110 $der\, c\, ([c_1 c_2 \ldots c_n])$  & $\dn$ & $?$\\
   123   $\textit{nullable}(r^{\{n..\}})$            & $\dn$ & $?$\\
   111 $der\, c\, (r^+)$                   & $\dn$ & $?$\\
   124   $\textit{nullable}(r^{\{n..m\}})$           & $\dn$ & $?$\\
   112 $der\, c\, (r^?)$                   & $\dn$ & $?$\\
   125   $\textit{nullable}(\sim{}r)$              & $\dn$ & $?$
   113 $der\, c\, (r^{\{n,m\}})$            & $\dn$ & $?$\\
   126 \end{tabular}
   114 $der\, c\, (\sim{}r)$               & $\dn$ & $?$\\
   127 \end{center}
       
   128 
       
   129 \begin{center}
       
   130 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
       
   131   $der\, c\, ([c_1,c_2,\ldots,c_n])$  & $\dn$ & $?$\\
       
   132   $der\, c\, (r^+)$                   & $\dn$ & $?$\\
       
   133   $der\, c\, (r^?)$                   & $\dn$ & $?$\\
       
   134   $der\, c\, (r^{\{n\}})$              & $\dn$ & $?$\\
       
   135   $der\, c\, (r^{\{..m\}})$           & $\dn$ & $?$\\
       
   136   $der\, c\, (r^{\{n..\}})$           & $\dn$ & $?$\\
       
   137   $der\, c\, (r^{\{n..m\}})$           & $\dn$ & $?$\\
       
   138   $der\, c\, (\sim{}r)$               & $\dn$ & $?$\\
   115 \end{tabular}
   139 \end{tabular}
   116 \end{center}
   140 \end{center}
   117 
   141 
   118 \noindent
   142 \noindent
   119 Remember your definitions have to satisfy the two properties
   143 Remember your definitions have to satisfy the two properties
   122 \item $\textit{nullable}(r)$ if and only if $[]\in L(r)$
   146 \item $\textit{nullable}(r)$ if and only if $[]\in L(r)$
   123 \item $L(der\,c\,r)) = Der\,c\,(L(r))$
   147 \item $L(der\,c\,r)) = Der\,c\,(L(r))$
   124 \end{itemize}
   148 \end{itemize}
   125 
   149 
   126 \noindent
   150 \noindent
   127 Give the implementation and the text-version of the clauses above.
   151 Given the definitions of \textit{nullable} and \textit{der}, it is
       
   152 easy to implement a regular expression matcher.  Test your regular
       
   153 expression matcher with (at least) the examples:
       
   154 
       
   155 
       
   156 \begin{center}
       
   157 \def\arraystretch{1.2}  
       
   158 \begin{tabular}{r|m{12mm}|m{12mm}|m{12mm}|m{12mm}|m{12mm}|m{12mm}}
       
   159   string & $a^{\{3\}}$ & $(a^?)^{\{3\}}$ & $a^{\{..3\}}$ &
       
   160      $(a^?)^{\{..3\}}$ & $a^{\{3..5\}}$ & $(a^?)^{\{3..5\}}$\\\hline
       
   161   $[]$           &&&&&& \\\hline 
       
   162   \texttt{a}     &&&&&& \\\hline 
       
   163   \texttt{aa}    &&&&&& \\\hline 
       
   164   \texttt{aaa}   &&&&&& \\\hline 
       
   165   \texttt{aaaaa} &&&&&& \\\hline 
       
   166   \texttt{aaaaaa}&&&&&& \\
       
   167 \end{tabular}
       
   168 \end{center}
       
   169 
       
   170 \noindent
       
   171 Does your matcher produce the expected results?
   128 
   172 
   129 \subsection*{Question 3}
   173 \subsection*{Question 3}
   130 
   174 
   131 Implement the following regular expression for email addresses
   175 There are a number of explicit regular expressions that deal with single or several
       
   176 characters, for example:
       
   177 
       
   178 \begin{center}
       
   179 \begin{tabular}{ll}
       
   180   $c$ & match a single character\\  
       
   181   $[c_1,c_2,\ldots,c_n]$ & match a set of characters---for character ranges\\
       
   182   $\textit{ALL}$ & match any character
       
   183 \end{tabular}
       
   184 \end{center}
       
   185 
       
   186 \noindent
       
   187 the latter is useful for matching any string (for example
       
   188 $\textit{ALL}^*$). In order to avoid having an explicit constructor
       
   189 for each case, we can generalise all such cases and introduce a single
       
   190 constructor $\textit{CFUN}(f)$ where $f$ is a function from characters
       
   191 to a boolean. Implement
       
   192 
       
   193 \begin{center}
       
   194 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
       
   195   $\textit{nullable}(\textit{CFUN}(f))$  & $\dn$ & $?$\\
       
   196   $\textit{der}\,c\,(\textit{CFUN}(f))$  & $\dn$ & $?$
       
   197 \end{tabular}
       
   198 \end{center}
       
   199 
       
   200 \noindent in your matcher and then give definitions for
       
   201 
       
   202 \begin{center}
       
   203 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
       
   204   $c$  & $\dn$ & $\textit{CFUN}(?)$\\
       
   205   $[c_1,c_2,\ldots,c_n]$  & $\dn$ & $\textit{CFUN}(?)$\\
       
   206   $\textit{ALL}$  & $\dn$ & $\textit{CFUN}(?)$
       
   207 \end{tabular}
       
   208 \end{center}
       
   209 
       
   210 
       
   211 \subsection*{Question 4}
       
   212 
       
   213 Suppose $[a\mbox{-}z0\mbox{-}9\_\,.\mbox{-}]$ stands for the regular expression
       
   214 
       
   215 \[[a,b,c,\ldots,z,0,\dots,9,\_,.,\mbox{-}]\;.\]
       
   216 
       
   217 \noindent
       
   218 Define in your code the following regular expression for email addresses
   132 
   219 
   133 \[
   220 \[
   134 ([a\mbox{-}z0\mbox{-}9\_\!\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}})
   221 ([a\mbox{-}z0\mbox{-}9\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}})
   135 \]
   222 \]
   136 
   223 
   137 \noindent and calculate the derivative according to your email
   224 \noindent and calculate the derivative according to your email
   138 address. When calculating the derivative, simplify all regular
   225 address. When calculating the derivative, simplify all regular
   139 expressions as much as possible by applying the
   226 expressions as much as possible by applying the
   152 \end{center}
   239 \end{center}
   153 
   240 
   154 \noindent Write down your simplified derivative in a readable
   241 \noindent Write down your simplified derivative in a readable
   155 notation using parentheses where necessary. That means you
   242 notation using parentheses where necessary. That means you
   156 should use the infix notation $+$, $\cdot$, $^*$ and so on,
   243 should use the infix notation $+$, $\cdot$, $^*$ and so on,
   157 instead of code.
   244 instead of code.\bigskip
   158  
   245  
   159 \subsection*{Question 4}
   246 \noindent
   160 
   247 Implement the simplification rules in your regular expression matcher.
   161 Suppose \textit{[a-z]} stands for the range regular expression
   248 Consider the regular expression $/ \cdot * \cdot
   162 $[a,b,c,\ldots,z]$.  Consider the regular expression $/ \cdot * \cdot
   249 (\sim{}(\textit{ALL}^* \cdot * \cdot / \cdot \textit{ALL}^*)) \cdot *
   163 (\sim{}([a\mbox{-}z]^* \cdot * \cdot / \cdot [a\mbox{-}z]^*)) \cdot *
       
   164 \cdot /$ and decide wether the following four strings are matched by
   250 \cdot /$ and decide wether the following four strings are matched by
   165 this regular expression. Answer yes or no.
   251 this regular expression. Answer yes or no.
   166 
   252 
   167 \begin{enumerate}
   253 \begin{enumerate}
   168 \item \texttt{"/**/"}
   254 \item \texttt{"/**/"}
   170 \item \texttt{"/*test*/test*/"}
   256 \item \texttt{"/*test*/test*/"}
   171 \item \texttt{"/*test/*test*/"}
   257 \item \texttt{"/*test/*test*/"}
   172 \end{enumerate}
   258 \end{enumerate}
   173 
   259 
   174 \noindent
   260 \noindent
   175 Also test your regular expression matcher with the regular
   261 Also let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be
   176 expressions $a^{\{3,5\}}$ and $(a^?)^{\{3,5\}}$. Test whether the
       
   177 strings
       
   178 
       
   179 \begin{enumerate}
       
   180 \setcounter{enumi}{4}
       
   181 \item \texttt{aa}
       
   182 \item \texttt{aaa}
       
   183 \item \texttt{aaaaa}
       
   184 \item \texttt{aaaaaa}
       
   185 \end{enumerate}
       
   186 
       
   187 \noindent
       
   188 are matched or not. Does your matcher produce the expected results?
       
   189 
       
   190 \subsection*{Question 5}
       
   191 
       
   192 Let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be
       
   193 $(a^{\{19,19\}}) \cdot (a^?)$.  Decide whether the following three
   262 $(a^{\{19,19\}}) \cdot (a^?)$.  Decide whether the following three
   194 strings consisting of $a$s only can be matched by $(r_1^+)^+$.
   263 strings consisting of $a$s only can be matched by $(r_1^+)^+$.
   195 Similarly test them with $(r_2^+)^+$. Again answer in all six cases
   264 Similarly test them with $(r_2^+)^+$. Again answer in all six cases
   196 with yes or no. \medskip
   265 with yes or no. \medskip
   197 
   266 
   199 These are strings are meant to be entirely made up of $a$s. Be careful
   268 These are strings are meant to be entirely made up of $a$s. Be careful
   200 when copy-and-pasting the strings so as to not forgetting any $a$ and
   269 when copy-and-pasting the strings so as to not forgetting any $a$ and
   201 to not introducing any other character.
   270 to not introducing any other character.
   202 
   271 
   203 \begin{enumerate}
   272 \begin{enumerate}
       
   273 \setcounter{enumi}{4}
   204 \item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
   274 \item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
   205 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
   275 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
   206 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}
   276 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}
   207 \item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
   277 \item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
   208 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
   278 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
   211 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
   281 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
   212 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}
   282 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}
   213 \end{enumerate}
   283 \end{enumerate}
   214 
   284 
   215 
   285 
       
   286 
   216 \end{document}
   287 \end{document}
   217 
   288 
   218 %%% Local Variables: 
   289 %%% Local Variables: 
   219 %%% mode: latex
   290 %%% mode: latex
   220 %%% TeX-master: t
   291 %%% TeX-master: t