cws/cw03.tex
author Christian Urban <urbanc@in.tum.de>
Tue, 22 Nov 2016 01:35:40 +0000
changeset 65 f59e70e2ad82
parent 62 2151c77e1e24
child 68 8da9e0c16194
permissions -rw-r--r--
updated

\documentclass{article}
\usepackage{../style}
%%\usepackage{../langs}

\begin{document}

\section*{Coursework 8 (Scala, Regular Expressions}

This coursework is worth 10\% and is due on XXXX at
16:00. You are asked to implement a regular expression matcher.

Make sure the files
you submit can be processed by just calling \texttt{scala
  <<filename.scala>>}.\bigskip 

\noindent
\textbf{Important:} Do not use any mutable data structures in your
submissions! They are not needed. This excluded the use of
\texttt{ListBuffer}s, for example. Do not use \texttt{return} in your
code! It has a different meaning in Scala, than in Java.
Do not use \texttt{var}! This declares a mutable variable. Feel free to
copy any code you need from files \texttt{knight1.scala},
\texttt{knight2.scala} and \texttt{knight3.scala}. Make sure the
functions you submit are defined on the ``top-level'' of Scala, not
inside a class or object.


\subsection*{Disclaimer}

It should be understood that the work you submit represents
your own effort. You have not copied from anyone else. An
exception is the Scala code I showed during the lectures or
uploaded to KEATS, which you can freely use.\bigskip


\subsubsection*{Task}

The task is to implement a regular expression matcher based on
derivatives of regular expressions. The implementation should
be able to deal with the usual (basic) regular expressions

\begin{center}
\begin{tabular}{lcll}
  $r$ & $::=$ & $\ZERO$     & cannot match anything\\
      &   $|$ & $\ONE$      & can only match the empty string\\
      &   $|$ & $c$         & can match a character $c$\\
      &   $|$ & $r_1 + r_2$ & can match either with $r_1$ or with $r_2$\\
      &   $|$ & $r_1 \cdot r_2$ & can match first with $r_1$ and then with $r_2$\\
      &   $|$ & $r^*$       & can match zero or more times $r$\\
      &   $|$ & $r^{\{\uparrow n\}}$ & can match zero upto $n$ times $r$\\
      &   $|$ & $r^{\{n\}}$ & can match exactly $n$ times $r$\\
\end{tabular}
\end{center}

\noindent
Implement a function called \textit{nullable} by recursion over
regular expressions:

\begin{center}
\begin{tabular}{lcl}
$\textit{nullable}(\ZERO)$ & $\dn$ & $\textit{false}$\\
$\textit{nullable}(\ONE)$  & $\dn$ & $\textit{true}$\\
$\textit{nullable}(c)$     & $\dn$ & $\textit{false}$\\
$\textit{nullable}(r_1 + r_2)$ & $\dn$ & $\textit{nullable}(r_1) \vee \textit{nullable}(r_2)$\\
$\textit{nullable}(r_1 \cdot r_2)$ & $\dn$ & $\textit{nullable}(r_1) \wedge \textit{nullable}(r_2)$\\
$\textit{nullable}(r^*)$ & $\dn$ & $\textit{true}$\\
$\textit{nullable}(r^{\{\uparrow n\}})$ & $\dn$ & $\textit{true}$\\
$\textit{nullable}(r^{\{n\}})$ & $\dn$ & 
   $\textit{if}\;n = 0\; \textit{then} \; \textit{true} \; \textit{else} \; \textit{nullable}(r)$\\
\end{tabular}
\end{center}

\begin{center}
\begin{tabular}{lcl}
$\textit{der}\;c\;(\ZERO)$ & $\dn$ & $\ZERO$\\
$\textit{der}\;c\;(\ONE)$  & $\dn$ & $\ZERO$\\
$\textit{der}\;c\;(d)$     & $\dn$ & $\textit{if}\; c = d\;\textit{then} \;\ONE \; \textit{else} \;\ZERO$\\
$\textit{der}\;c\;(r_1 + r_2)$ & $\dn$ & $(\textit{der}\;c\;r_1) + (\textit{der}\;c\;r_2)$\\
$\textit{der}\;c\;(r_1 \cdot r_2)$ & $\dn$ & $\textit{if}\;\textit{nullable}(r_1)$\\
      & & $\textit{then}\;((\textit{der}\;c\;r_1)\cdot r_2) + (\textit{der}\;c\;r_2)$\\
      & & $\textit{else}\;(\textit{der}\;c\;r_1)\cdot r_2$\\
$\textit{der}\;c\;(r^*)$ & $\dn$ & $(\textit{der}\;c\;r)\cdot (r^*)$\\
$\textit{der}\;c\;(r^{\{\uparrow n\}})$ & $\dn$ & $\textit{if}\;n = 0\;\textit{then}\;\ZERO\;\text{else}\;
   (\textit{der}\;c\;r)\cdot (r^{\{\uparrow n-1\}})$\\
$\textit{der}\;c\;(r^{\{n\}})$ & $\dn$ & 
   $\textit{if}\;n = 0\; \textit{then} \; \ZERO\; \textit{else}\;$\\
   & & $\textit{if} \;\textit{nullable}(r)\;\textit{then}\;(\textit{der}\;c\;r)\cdot (r^{\{\uparrow n-1\}})$\\
   & & $\textit{else}\;(\textit{der}\;c\;r)\cdot (r^{\{n-1\}})$
\end{tabular}
\end{center}


Be careful that your implementation of \textit{nullable} and
\textit{der}\;c\; satisfies for every $r$ the following two
properties (see also Question 2):

\begin{itemize}
\item $\textit{nullable}(r)$ if and only if $[]\in L(r)$
\item $L(der\,c\,r) = Der\,c\,(L(r))$
\end{itemize}

\noindent {\bf Important!} Your implementation should have
explicit cases for the basic regular expressions, but also
explicit cases for the extended regular expressions. That
means do not treat the extended regular expressions by just
translating them into the basic ones. See also Question 2,
where you are asked to explicitly give the rules for
\textit{nullable} and \textit{der}\;c\; for the extended regular
expressions.


\subsection*{Question 1}

What is your King's email address (you will need it in
Question 3)?

\subsection*{Question 2}

This question does not require any implementation. From the
lectures you have seen the definitions for the functions
\textit{nullable} and \textit{der}\;c\; for the basic regular
expressions. Give the rules for the extended regular
expressions:

\begin{center}
\begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {}}
$\textit{nullable}([c_1 c_2 \ldots c_n])$  & $\dn$ & $?$\\
$\textit{nullable}(r^+)$                   & $\dn$ & $?$\\
$\textit{nullable}(r^?)$                   & $\dn$ & $?$\\
$\textit{nullable}(r^{\{n,m\}})$            & $\dn$ & $?$\\
$\textit{nullable}(\sim{}r)$               & $\dn$ & $?$\medskip\\
$der\, c\, ([c_1 c_2 \ldots c_n])$  & $\dn$ & $?$\\
$der\, c\, (r^+)$                   & $\dn$ & $?$\\
$der\, c\, (r^?)$                   & $\dn$ & $?$\\
$der\, c\, (r^{\{n,m\}})$            & $\dn$ & $?$\\
$der\, c\, (\sim{}r)$               & $\dn$ & $?$\\
\end{tabular}
\end{center}

\noindent
Remember your definitions have to satisfy the two properties

\begin{itemize}
\item $\textit{nullable}(r)$ if and only if $[]\in L(r)$
\item $L(der\,c\,r)) = Der\,c\,(L(r))$
\end{itemize}

\subsection*{Question 3}

Implement the following regular expression for email addresses

\[
([a\mbox{-}z0\mbox{-}9\_\!\_\,.-]^+)\cdot @\cdot ([a\mbox{-}z0\mbox{-}9\,.-]^+)\cdot .\cdot ([a\mbox{-}z\,.]^{\{2,6\}})
\]

\noindent and calculate the derivative according to your email
address. When calculating the derivative, simplify all regular
expressions as much as possible by applying the
following 7 simplification rules:

\begin{center}
\begin{tabular}{l@{\hspace{2mm}}c@{\hspace{2mm}}ll}
$r \cdot \ZERO$ & $\mapsto$ & $\ZERO$\\ 
$\ZERO \cdot r$ & $\mapsto$ & $\ZERO$\\ 
$r \cdot \ONE$ & $\mapsto$ & $r$\\ 
$\ONE \cdot r$ & $\mapsto$ & $r$\\ 
$r + \ZERO$ & $\mapsto$ & $r$\\ 
$\ZERO + r$ & $\mapsto$ & $r$\\ 
$r + r$ & $\mapsto$ & $r$\\ 
\end{tabular}
\end{center}

\noindent Write down your simplified derivative in a readable
notation using parentheses where necessary. That means you
should use the infix notation $+$, $\cdot$, $^*$ and so on,
instead of code.
 
\subsection*{Question 4}

Suppose \textit{[a-z]} stands for the range regular expression
$[a,b,c,\ldots,z]$.  Consider the regular expression $/ \cdot * \cdot
(\sim{}([a\mbox{-}z]^* \cdot * \cdot / \cdot [a\mbox{-}z]^*)) \cdot *
\cdot /$ and decide wether the following four strings are matched by
this regular expression. Answer yes or no.

\begin{enumerate}
\item \texttt{"/**/"}
\item \texttt{"/*foobar*/"}
\item \texttt{"/*test*/test*/"}
\item \texttt{"/*test/*test*/"}
\end{enumerate}

\noindent
Also test your regular expression matcher with the regular
expression $a^{\{3,5\}}$ and the strings

\begin{enumerate}
\setcounter{enumi}{4}
\item \texttt{aa}
\item \texttt{aaa}
\item \texttt{aaaaa}
\item \texttt{aaaaaa}
\end{enumerate}

\noindent
Does your matcher produce the expected results?

\subsection*{Question 5}

Let $r_1$ be the regular expression $a\cdot a\cdot a$ and $r_2$ be
$(a^{\{19,19\}}) \cdot (a^?)$.  Decide whether the following three
strings consisting of $a$s only can be matched by $(r_1^+)^+$.
Similarly test them with $(r_2^+)^+$. Again answer in all six cases
with yes or no. \medskip

\noindent
These are strings are meant to be entirely made up of $a$s. Be careful
when copy-and-pasting the strings so as to not forgetting any $a$ and
to not introducing any other character.

\begin{enumerate}
\item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}
\item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}
\item \texttt{"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\ 
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"}
\end{enumerate}


\end{document}

%%% Local Variables: 
%%% mode: latex
%%% TeX-master: t
%%% End: