diff -r c1e6123d02f4 -r aae256985251 cws/cw03.tex --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cws/cw03.tex Thu Nov 03 00:53:53 2016 +0000 @@ -0,0 +1,226 @@ +\documentclass{article} +\usepackage{style} +%%\usepackage{../langs} + +\begin{document} + +\section*{Coursework 3} + +This coursework is worth XXX\% and is due on XXXX at +16:00. You are asked to implement a regular expression matcher. + + + + +\subsubsection*{Disclaimer} + +It should be understood that the work you submit represents +your own effort. You have not copied from anyone else. An +exception is the Scala code I showed during the lectures or +uploaded to KEATS, which you can freely use.\bigskip + + +\subsubsection*{Task} + +The task is to implement a regular expression matcher based on +derivatives of regular expressions. The implementation should +be able to deal with the usual (basic) regular expressions + +\begin{center} +\begin{tabular}{lcll} + $r$ & $::=$ & $\ZERO$ & cannot match anything\\ + & $|$ & $\ONE$ & can only match the empty string\\ + & $|$ & $c$ & can match a character $c$\\ + & $|$ & $r_1 + r_2$ & can match either with $r_1$ or with $r_2$\\ + & $|$ & $r_1 \cdot r_2$ & can match first with $r_1$ and then with $r_2$\\ + & $|$ & $r^*$ & can match zero or more times $r$\\ + & $|$ & $r^{\{\uparrow n\}}$ & can match zero upto $n$ times $r$\\ + & $|$ & $r^{\{n\}}$ & can match exactly $n$ times $r$\\ +\end{tabular} +\end{center} + +\noindent +Implement a function called \textit{nullable} by recursion over +regular expressions: + +\begin{center} +\begin{tabular}{lcl} +$\textit{nullable}(\ZERO)$ & $\dn$ & $\textit{false}$\\ +$\textit{nullable}(\ONE)$ & $\dn$ & $\textit{true}$\\ +$\textit{nullable}(c)$ & $\dn$ & $\textit{false}$\\ +$\textit{nullable}(r_1 + r_2)$ & $\dn$ & $\textit{nullable}(r_1) \vee \textit{nullable}(r_2)$\\ +$\textit{nullable}(r_1 \cdot r_2)$ & $\dn$ & $\textit{nullable}(r_1) \wedge \textit{nullable}(r_2)$\\ +$\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\\ +$\textit{nullable}(r^{\{\uparrow n\}})$ & $\dn$ & $\textit{true}$\\ +$\textit{nullable}(r^{\{n\}})$ & $\dn$ & + $\textit{if}\;n = 0\; \textit{then} \; \textit{true} \; \textit{else} \; \textit{nullable}(r)$\\ +\end{tabular} +\end{center} + +\begin{center} +\begin{tabular}{lcl} +$\textit{der}\;c\;(\ZERO)$ & $\dn$ & $\ZERO$\\ +$\textit{der}\;c\;(\ONE)$ & $\dn$ & $\ZERO$\\ +$\textit{der}\;c\;(d)$ & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;\ONE \; \textit{else} \;\ZERO$\\ +$\textit{der}\;c\;(r_1 + r_2)$ & $\dn$ & $(\textit{der}\;c\;r_1) + (\textit{der}\;c\;r_2)$\\ +$\textit{der}\;c\;(r_1 \cdot r_2)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r_1)$\\ + & & $\textit{then}\;((\textit{der}\;c\;r_1)\cdot r_2) + (\textit{der}\;c\;r_2)$\\ + & & $\textit{else}\;(\textit{der}\;c\;r_1)\cdot r_2$\\ +$\textit{der}\;c\;(r^*)$ & $\dn$ & $(\textit{der}\;c\;r)\cdot (r^*)$\\ +$\textit{der}\;c\;(r^{\{\uparrow n\}})$ & $\dn$ & $\textit{if}\;n = 0\;\textit{then}\;\ZERO\;\text{else}\; + (\textit{der}\;c\;r)\cdot (r^{\{\uparrow n-1\}})$\\ +$\textit{der}\;c\;(r^{\{n\}})$ & $\dn$ & + $\textit{if}\;n = 0\; \textit{then} \; \ZERO\; \textit{else}\;$\\ + & & $\textit{if} \;\textit{nullable}(r)\;\textit{then}\;(\textit{der}\;c\;r)\cdot (r^{\{\uparrow n-1\}})$\\ + & & $\textit{else}\;(\textit{der}\;c\;r)\cdot (r^{\{n-1\}})$ +\end{tabular} +\end{center} + + +Be careful that your implementation of \textit{nullable} and +\textit{der}\;c\; satisfies for every $r$ the following two +properties (see also Question 2): + +\begin{itemize} +\item $\textit{nullable}(r)$ if and only if $[]\in L(r)$ +\item $L(der\,c\,r) = Der\,c\,(L(r))$ +\end{itemize} + +\noindent {\bf Important!} Your implementation should have +explicit cases for the basic regular expressions, but also +explicit cases for the extended regular expressions. That +means do not treat the extended regular expressions by just +translating them into the basic ones. See also Question 2, +where you are asked to explicitly give the rules for +\textit{nullable} and \textit{der}\;c\; for the extended regular +expressions. + + +\subsection*{Question 1} + +What is your King's email address (you will need it in +Question 3)? + +\subsection*{Question 2} + +This question does not require any implementation. From the +lectures you have seen the definitions for the functions +\textit{nullable} and \textit{der}\;c\; for the basic regular +expressions. Give the rules for the extended regular +expressions: + +\begin{center} +\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}} +$\textit{nullable}([c_1 c_2 \ldots c_n])$ & $\dn$ & $?$\\ +$\textit{nullable}(r^+)$ & $\dn$ & $?$\\ +$\textit{nullable}(r^?)$ & $\dn$ & $?$\\ +$\textit{nullable}(r^{\{n,m\}})$ & $\dn$ & $?$\\ +$\textit{nullable}(\sim{}r)$ & $\dn$ & $?$\medskip\\ +$der\, c\, ([c_1 c_2 \ldots c_n])$ & $\dn$ & $?$\\ +$der\, c\, (r^+)$ & $\dn$ & $?$\\ +$der\, c\, (r^?)$ & $\dn$ & $?$\\ +$der\, c\, (r^{\{n,m\}})$ & $\dn$ & $?$\\ +$der\, c\, (\sim{}r)$ & $\dn$ & $?$\\ +\end{tabular} +\end{center} + +\noindent +Remember your definitions have to satisfy the two properties + +\begin{itemize} +\item $\textit{nullable}(r)$ if and only if $[]\in L(r)$ +\item $L(der\,c\,r)) = Der\,c\,(L(r))$ +\end{itemize} + +\subsection*{Question 3} + +Implement the following regular expression for email addresses + +\[ +([a\mbox{-}z0\mbox{-}9\_\!\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}}) +\] + +\noindent and calculate the derivative according to your email +address. When calculating the derivative, simplify all regular +expressions as much as possible by applying the +following 7 simplification rules: + +\begin{center} +\begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}ll} +$r \cdot \ZERO$ & $\mapsto$ & $\ZERO$\\ +$\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\ +$r \cdot \ONE$ & $\mapsto$ & $r$\\ +$\ONE \cdot r$ & $\mapsto$ & $r$\\ +$r + \ZERO$ & $\mapsto$ & $r$\\ +$\ZERO + r$ & $\mapsto$ & $r$\\ +$r + r$ & $\mapsto$ & $r$\\ +\end{tabular} +\end{center} + +\noindent Write down your simplified derivative in a readable +notation using parentheses where necessary. That means you +should use the infix notation $+$, $\cdot$, $^*$ and so on, +instead of code. + +\subsection*{Question 4} + +Suppose \textit{[a-z]} stands for the range regular expression +$[a,b,c,\ldots,z]$. Consider the regular expression $/ \cdot * \cdot +(\sim{}([a\mbox{-}z]^* \cdot * \cdot / \cdot [a\mbox{-}z]^*)) \cdot * +\cdot /$ and decide wether the following four strings are matched by +this regular expression. Answer yes or no. + +\begin{enumerate} +\item \texttt{"/**/"} +\item \texttt{"/*foobar*/"} +\item \texttt{"/*test*/test*/"} +\item \texttt{"/*test/*test*/"} +\end{enumerate} + +\noindent +Also test your regular expression matcher with the regular +expression $a^{\{3,5\}}$ and the strings + +\begin{enumerate} +\setcounter{enumi}{4} +\item \texttt{aa} +\item \texttt{aaa} +\item \texttt{aaaaa} +\item \texttt{aaaaaa} +\end{enumerate} + +\noindent +Does your matcher produce the expected results? + +\subsection*{Question 5} + +Let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be +$(a^{\{19,19\}}) \cdot (a^?)$. Decide whether the following three +strings consisting of $a$s only can be matched by $(r_1^+)^+$. +Similarly test them with $(r_2^+)^+$. Again answer in all six cases +with yes or no. \medskip + +\noindent +These are strings are meant to be entirely made up of $a$s. Be careful +when copy-and-pasting the strings so as to not forgetting any $a$ and +to not introducing any other character. + +\begin{enumerate} +\item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"} +\item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"} +\item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"} +\end{enumerate} + + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: