afl-material: hws/hw04.tex@f4df090a84d0


\documentclass{article}
\usepackage{../style}
\usepackage{../graphics}

\begin{document}

\section*{Homework 4}

\HEADER

\begin{enumerate}

\item Given the regular expressions

\begin{center}
\begin{tabular}{ll}    
  1) & $(ab + a)\cdot (\ONE + b)$\\
  2) & $(aa + a)^*$\\
\end{tabular}
\end{center}

there are several values for how these regular expressions can
recognise the strings (for 1) $ab$ and (for 2) $aaa$. Give in each case
\emph{all} the values and indicate which one is the POSIX value.
  

\item If a regular expression $r$ does not contain any occurrence of $\ZERO$,  
is it possible for $L(r)$ to be empty? Explain why, or give a proof.

\item Define the tokens and regular expressions for a language
  consisting of numbers, left-parenthesis $($, right-parenthesis $)$,
  identifiers and the operations $+$, $-$ and $*$. Can the following
  strings in this language be lexed?

  \begin{itemize}
  \item $(a + 3) * b$
  \item $)()++ -33$
  \item $(a / 3) * 3$
  \end{itemize}

  In case they can, can you give the corresponding token
  sequences.

\item Assume $r$ is nullable. Show that
  \[ 1 + r + r\cdot r \;\equiv\; r\cdot r
  \]

  holds.

\item \textbf{(Deleted)} Assume that $s^{-1}$ stands for the operation of reversing a
  string $s$. Given the following \emph{reversing} function on regular
  expressions

  \begin{center}
    \begin{tabular}{r@{\hspace{1mm}}c@{\hspace{1mm}}l}
      $rev(\ZERO)$   & $\dn$ & $\ZERO$\\
      $rev(\ONE)$         & $\dn$ & $\ONE$\\
      $rev(c)$                      & $\dn$ & $c$\\
      $rev(r_1 + r_2)$        & $\dn$ & $rev(r_1) + rev(r_2)$\\
      $rev(r_1 \cdot r_2)$  & $\dn$ & $rev(r_2) \cdot rev(r_1)$\\
      $rev(r^*)$                   & $\dn$ & $rev(r)^*$\\
    \end{tabular}
  \end{center}

  and the set

  \begin{center}
    $Rev\,A \dn \{s^{-1} \;|\; s \in A\}$
  \end{center}

  prove whether

  \begin{center}
    $L(rev(r)) = Rev (L(r))$
  \end{center}

  holds.

\item Assume the delimiters for comments are
      \texttt{$\slash$*} and \texttt{*$\slash$}. Give a
      regular expression that can recognise comments of the
      form

  \begin{center}
    \texttt{$\slash$*~\ldots{}~*$\slash$} 
  \end{center}

      where the three dots stand for arbitrary characters, but
      not comment delimiters. (Hint: You can assume you are
      already given a regular expression written \texttt{ALL},
      that can recognise any character, and a regular
      expression \texttt{NOT} that recognises the complement
      of a regular expression.)
  
\item Simplify the regular expression

\[
(\ZERO \cdot (b \cdot c)) + 
((\ZERO \cdot c) + \ONE)
\]

      Does simplification always preserve the meaning of a
      regular expression? 
     
\item The Sulzmann \& Lu algorithm contains the function
      $mkeps$ which answers how a regular expression can match
      the empty string. What is the answer of $mkeps$ for the
      regular expressions:    

  \[
  \begin{array}{l}
  (\ZERO \cdot (b \cdot c)) + 
  ((\ZERO \cdot c) + \ONE)\\
    (a + \ONE) \cdot (\ONE + \ONE)\\
    a^*
  \end{array}
  \]

\item What is the purpose of the record regular expression in
      the Sulzmann \& Lu algorithm?

\item Recall the functions \textit{nullable} and
      \textit{zeroable}.  Define recursive functions
      \textit{atmostempty} (for regular expressions that match no
      string or only the empty string), \textit{somechars} (for
      regular expressions that match some non-empty string),
      \textit{infinitestrings} (for regular expressions that can match
      infinitely many strings).
      
\item There are two kinds of automata that are generated for 
  regular expression matching---DFAs and NFAs. (1) Regular expression engines like
  the one in Python generate NFAs.  Explain what is the problem with such
  NFAs and what is the reason why they use NFAs. (2) Regular expression
  engines like the one in Rust generate DFAs. Explain what is the
  problem with these regex engines and also what is the problem with $a^{\{1000\}}$
  in these engines.
      
%\item (Optional) The tokenizer in \texttt{regexp3.scala} takes as
%argument a string and a list of rules. The result is a list of tokens. Improve this tokenizer so 
%that it filters out all comments and whitespace from the result.

%\item (Optional) Modify the tokenizer in \texttt{regexp2.scala} so that it
%implements the \texttt{findAll} function. This function takes a regular
%expressions and a string, and returns all substrings in this string that 
      %match the regular expression.

\item \POSTSCRIPT  
\end{enumerate}


\end{document}

%%% Local Variables: 
%%% mode: latex
%%% TeX-master: t
%%% End:
author	Christian Urban <christian.urban@kcl.ac.uk>
	Fri, 21 Oct 2022 13:30:12 +0100
changeset 892	f4df090a84d0
parent 843	97b622202547
child 893	54a483a33763
permissions	-rw-r--r--