afl-material: hws/hw05.tex@dc5ab66b11cc


\documentclass{article}
\usepackage{../style}
\usepackage{../graphics}
\usepackage{../grammar}

\begin{document}

% explain what is a context-free grammar and the language it generates 
%


\section*{Homework 5}

\HEADER


\begin{enumerate}
\item Consider the basic regular expressions

\begin{center}
$r ::= \ZERO \;|\; \ONE \;|\; c  \;|\; r_1 + r_2  \;|\; r_1 \cdot r_2 \;|\; r^*$
\end{center}

      and suppose you want to show a property $P(r)$ for all
      regular expressions $r$ by structural induction. Write
      down which cases do you need to analyse. State clearly
      the induction hypotheses if applicable in a case.

\item Define a regular expression, written $ALL$, that can
      match every string. This definition should be in terms
      of the following extended regular expressions:

\begin{center}
$r ::= \ZERO \;|\; 
       \ONE \;|\;  
       c  \;|\; 
       r_1 + r_2 \;|\; 
       r_1 \cdot r_2 \;|\; 
       r^* \;|\;
       \sim r$
\end{center}

%\item Assume the delimiters for comments are \texttt{$\slash$*}
%and \texttt{*$\slash$}. Give a regular expression that can
%recognise comments of the form
%
%\begin{center}
%\texttt{$\slash$*~\ldots{}~*$\slash$} 
%\end{center}
%
%where the three dots stand for arbitrary characters, but not
%comment delimiters.

\item Define the following regular expressions 

\begin{center}
\begin{tabular}{ll}
$r^+$ & (one or more matches)\\
$r^?$   & (zero or one match)\\
$r^{\{n\}}$ & (exactly $n$ matches)\\
$r^{\{m, n\}}$ & (at least $m$ and maximal $n$ matches, with the\\
&  \phantom{(}assumption $m \le n$)\\
\end{tabular}
\end{center}

in terms of the usual basic regular expressions

\begin{center}
$r ::= \ZERO \;|\; \ONE \;|\; c  \;|\; r_1 + r_2  \;|\; r_1 \cdot r_2 \;|\; r^*$
\end{center}

\item Give the regular expressions for lexing a language
      consisting of identifiers, left-parenthesis \texttt{(},
      right-parenthesis \texttt{)}, numbers that can be either
      positive or negative, and the operations \texttt{+},
      \texttt{-} and \texttt{*}. 

      Decide whether the following strings can be lexed in
      this language?

\begin{enumerate}
\item \texttt{"(a3+3)*b"}
\item \texttt{")()++-33"}
\item \texttt{"(b42/3)*3"}
\end{enumerate}

In case they can, give the corresponding token sequences. (Hint: 
Observe the maximal munch rule and the priorities of your regular
expressions that make the process of lexing unambiguous.)

\item Suppose the following context-free grammar $G$
  
\begin{plstx}[margin=1cm]
  : \meta{S\/} ::= \meta{A\/}\cdot\meta{S\/}\cdot\meta{B\/} \;\mid\;
                   \meta{B\/}\cdot\meta{S\/}\cdot\meta{A\/} \;\mid\; \epsilon\\
  : \meta{A\/} ::= a \mid \epsilon\\
  : \meta{B\/} ::= b\\
\end{plstx}

where the starting symbol is $\meta{S}$.
Which of the following strings are in the language of $G$?

\begin{itemize}
\item[$\bullet$] $a$
\item[$\bullet$] $b$
\item[$\bullet$] $ab$
\item[$\bullet$] $ba$
\item[$\bullet$] $bb$
\item[$\bullet$] $baa$
\end{itemize}

\item Suppose the following context-free grammar 
  
  \begin{plstx}[margin=1cm]
  : \meta{S\/} ::= a\cdot \meta{S\/}\cdot a\;\mid\;
                   b\cdot \meta{S\/}\cdot b\;\mid\; \epsilon\\
  \end{plstx}

Describe which language is generated by this grammar.
  
\item Remember we have specified identifiers with regular expressions as
  strings that start with a letter followed by letters, digits and
  underscores. This can also be specified by a grammar rule or rules.
  What would the rule(s) look like for identifiers? 

  \solution{
  \begin{plstx}[margin=1cm]
  : \meta{Id\/} ::= \meta{Let\/}\cdot \meta{R}\\
  : \meta{Let\/} ::= a \;\mid\; \dots \;\mid\; z\\
  : \meta{Dig\/} ::= 0 \;\mid\; \dots \;\mid\; 9\\
  : \meta{R\/} ::= \meta{Let\/} \cdot \meta{R\/} \;\mid\;
                  \meta{Dig\/} \cdot \meta{R\/} \;\mid\;
                  $\_$ \cdot \meta{R\/} \;\mid\; \epsilon\\
  \end{plstx}  
}

\item If we specify keywords, identifiers (see above) and programs
  by grammar rules, are there any problems you need to be careful
  about when using a parser for identifying tokens?

  \solution{Parsers do not have the POSIX rules (e.g.~longest munch
    rule) built in. I am not aware that any parser does this out of
    the box and you would need to build in such constraints into the
    grammar rules or parsing mechanism.}

\item {\bf(Optional)} Recall the definitions for $Der$ and $der$
      from the lectures. Prove by induction on $r$ the
      property that 

\[
L(der\,c\,r) = Der\,c\,(L(r))
\]

      holds.

\item \POSTSCRIPT        
\end{enumerate}

\end{document}

%%% Local Variables: 
%%% mode: latex
%%% TeX-master: t
%%% End:
author	Christian Urban <christian.urban@kcl.ac.uk>
	Tue, 03 Oct 2023 14:29:12 +0100
changeset 937	dc5ab66b11cc
parent 619	136517d67d40
child 953	5e070fb0332a
permissions	-rw-r--r--