hws/hw04.tex
author Christian Urban <christian.urban@kcl.ac.uk>
Fri, 28 Oct 2022 09:08:13 +0100
changeset 893 54a483a33763
parent 892 f4df090a84d0
child 916 10f834eb0a9e
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
31
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     1
\documentclass{article}
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
     2
\usepackage{../style}
292
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
     3
\usepackage{../graphics}
146
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
     4
893
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
     5
\newcommand{\solution}[1]{%
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
     6
  \begin{quote}\sf%
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
     7
    #1%
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
     8
  \end{quote}}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
     9
\renewcommand{\solution}[1]{}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    10
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    11
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    12
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    13
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    14
31
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    15
\begin{document}
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    16
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    17
\section*{Homework 4}
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    18
347
22b5294daa2a updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 294
diff changeset
    19
\HEADER
22b5294daa2a updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 294
diff changeset
    20
31
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    21
\begin{enumerate}
34
Christian Urban <urbanc@in.tum.de>
parents: 32
diff changeset
    22
726
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    23
\item Given the regular expressions
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    24
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    25
\begin{center}
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    26
\begin{tabular}{ll}    
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    27
  1) & $(ab + a)\cdot (\ONE + b)$\\
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    28
  2) & $(aa + a)^*$\\
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    29
\end{tabular}
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    30
\end{center}
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    31
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    32
there are several values for how these regular expressions can
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    33
recognise the strings (for 1) $ab$ and (for 2) $aaa$. Give in each case
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    34
\emph{all} the values and indicate which one is the POSIX value.
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    35
  
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    36
444
3056a4c071b0 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 401
diff changeset
    37
\item If a regular expression $r$ does not contain any occurrence of $\ZERO$,  
893
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    38
  is it possible for $L(r)$ to be empty? Explain why, or give a proof.
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    39
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    40
  \solution{
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    41
    The property to prove is
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    42
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    43
    \begin{center}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    44
    $P(r)$: If $r$ does not contain $\ZERO$, then $L(r) \not= \emptyset$.  
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    45
  \end{center}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    46
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    47
  For this you have to now go through all cases.
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    48
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    49
  Case $r = 0$: $P(\ZERO)$ says: If $\ZERO$ does not contain $\ZERO$
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    50
  then \ldots. The premise is obviously false, so everything follows,
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    51
  in particular $L(r) \not= \emptyset$.\medskip
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    52
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    53
  Case $r = \ONE$ and $r = c$ are similar, just that the premise is
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    54
  true, but also $L(\ONE)$ and $L(c)$ are not empty. So we shown
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    55
  $L(r) \not= \emptyset$.\medskip
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    56
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    57
  Case $r = r_1 + r_2$: We know $P(r_1)$ and $P(r_2)$ as IHs. We need to show
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    58
  $P(r_1 + r_2)$: If $r_1 + r_2$ does not contain $\ZERO$, then $L(r_1 + r_2) \not= \emptyset$.
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    59
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    60
  If $r_1 + r_2$ does not contain $\ZERO$, then also $r_1$ does not contain $\ZERO$
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    61
  and $r_2$ does not contain $\ZERO$. So we can apply the two IHs $P(r_1)$ and $P(r_2)$,
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    62
  which allow us to infer that $L(r_1) \not= \emptyset$ and $L(r_2) \not= \emptyset$.
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    63
  But if this is the case, then also $L(r_1 + r_2) \not= \emptyset$, which is what we needed
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    64
  to show in this case.\medskip
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    65
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    66
  The other cases are similar.\bigskip
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    67
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    68
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    69
  This lemma essentially says that for basic regular expressions, if
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    70
  they do not match anything at all, they must contain $\ZERO$(s)
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    71
  somewhere.
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    72
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    73
}
32
d085fe0c086f started
Christian Urban <urbanc@in.tum.de>
parents: 31
diff changeset
    74
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
    75
\item Define the tokens and regular expressions for a language
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    76
  consisting of numbers, left-parenthesis $($, right-parenthesis $)$,
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    77
  identifiers and the operations $+$, $-$ and $*$. Can the following
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    78
  strings in this language be lexed?
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
    79
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    80
  \begin{itemize}
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
    81
  \item $(a + 3) * b$
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
    82
  \item $)()++ -33$
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
    83
  \item $(a / 3) * 3$
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    84
  \end{itemize}
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
    85
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    86
  In case they can, can you give the corresponding token
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    87
  sequences.
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
    88
768
34f77b976b88 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
    89
\item Assume $r$ is nullable. Show that
34f77b976b88 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
    90
  \[ 1 + r + r\cdot r \;\equiv\; r\cdot r
34f77b976b88 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
    91
  \]
34f77b976b88 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
    92
34f77b976b88 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
    93
  holds.
34f77b976b88 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
    94
893
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    95
  \solution{
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    96
    If $r$ is nullable, then $1 + r \equiv r$. With this you can replace
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    97
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    98
    \begin{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    99
      (1 + r) + r\cdot r & \equiv  r + r\cdot r\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   100
                         & \equiv  r \cdot (1 + r)\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   101
                         & \equiv  r \cdot r
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   102
    \end{align}  
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   103
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   104
    where in (2) you pull out the ``factor'' $r$ (because $r_1 \cdot (r_2 + r_3) \equiv r_1 \cdot r_2 + r_1 \cdot r_3$).
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   105
  }
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   106
  
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   107
768
34f77b976b88 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   108
\item \textbf{(Deleted)} Assume that $s^{-1}$ stands for the operation of reversing a
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   109
  string $s$. Given the following \emph{reversing} function on regular
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   110
  expressions
32
d085fe0c086f started
Christian Urban <urbanc@in.tum.de>
parents: 31
diff changeset
   111
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   112
  \begin{center}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   113
    \begin{tabular}{r@{\hspace{1mm}}c@{\hspace{1mm}}l}
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   114
      $rev(\ZERO)$   & $\dn$ & $\ZERO$\\
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   115
      $rev(\ONE)$         & $\dn$ & $\ONE$\\
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   116
      $rev(c)$                      & $\dn$ & $c$\\
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   117
      $rev(r_1 + r_2)$        & $\dn$ & $rev(r_1) + rev(r_2)$\\
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   118
      $rev(r_1 \cdot r_2)$  & $\dn$ & $rev(r_2) \cdot rev(r_1)$\\
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   119
      $rev(r^*)$                   & $\dn$ & $rev(r)^*$\\
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   120
    \end{tabular}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   121
  \end{center}
34
Christian Urban <urbanc@in.tum.de>
parents: 32
diff changeset
   122
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   123
  and the set
32
d085fe0c086f started
Christian Urban <urbanc@in.tum.de>
parents: 31
diff changeset
   124
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   125
  \begin{center}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   126
    $Rev\,A \dn \{s^{-1} \;|\; s \in A\}$
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   127
  \end{center}
31
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   128
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   129
  prove whether
32
d085fe0c086f started
Christian Urban <urbanc@in.tum.de>
parents: 31
diff changeset
   130
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   131
  \begin{center}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   132
    $L(rev(r)) = Rev (L(r))$
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   133
  \end{center}
31
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   134
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   135
  holds.
42
Christian Urban <urbanc@in.tum.de>
parents: 34
diff changeset
   136
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   137
\item Assume the delimiters for comments are
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   138
      \texttt{$\slash$*} and \texttt{*$\slash$}. Give a
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   139
      regular expression that can recognise comments of the
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   140
      form
42
Christian Urban <urbanc@in.tum.de>
parents: 34
diff changeset
   141
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   142
  \begin{center}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   143
    \texttt{$\slash$*~\ldots{}~*$\slash$} 
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   144
  \end{center}
42
Christian Urban <urbanc@in.tum.de>
parents: 34
diff changeset
   145
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   146
      where the three dots stand for arbitrary characters, but
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   147
      not comment delimiters. (Hint: You can assume you are
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   148
      already given a regular expression written \texttt{ALL},
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   149
      that can recognise any character, and a regular
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   150
      expression \texttt{NOT} that recognises the complement
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   151
      of a regular expression.)
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   152
  
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   153
\item Simplify the regular expression
42
Christian Urban <urbanc@in.tum.de>
parents: 34
diff changeset
   154
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   155
\[
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   156
(\ZERO \cdot (b \cdot c)) + 
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   157
((\ZERO \cdot c) + \ONE)
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   158
\]
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   159
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   160
      Does simplification always preserve the meaning of a
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   161
      regular expression? 
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   162
     
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   163
\item The Sulzmann \& Lu algorithm contains the function
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   164
      $mkeps$ which answers how a regular expression can match
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   165
      the empty string. What is the answer of $mkeps$ for the
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   166
      regular expressions:    
146
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   167
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   168
  \[
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   169
  \begin{array}{l}
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   170
  (\ZERO \cdot (b \cdot c)) + 
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   171
  ((\ZERO \cdot c) + \ONE)\\
577
7a437f1f689d updated
Christian Urban <urbanc@in.tum.de>
parents: 525
diff changeset
   172
    (a + \ONE) \cdot (\ONE + \ONE)\\
7a437f1f689d updated
Christian Urban <urbanc@in.tum.de>
parents: 525
diff changeset
   173
    a^*
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   174
  \end{array}
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   175
  \]
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   176
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   177
\item What is the purpose of the record regular expression in
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   178
      the Sulzmann \& Lu algorithm?
498
ea47c3b8f35f updated
Christian Urban <urbanc@in.tum.de>
parents: 444
diff changeset
   179
843
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   180
\item Recall the functions \textit{nullable} and
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   181
      \textit{zeroable}.  Define recursive functions
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   182
      \textit{atmostempty} (for regular expressions that match no
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   183
      string or only the empty string), \textit{somechars} (for
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   184
      regular expressions that match some non-empty string),
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   185
      \textit{infinitestrings} (for regular expressions that can match
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   186
      infinitely many strings).
893
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   187
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   188
      \solution{
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   189
        \textbf{zeroable}: The property is $z(r) \;\text{iff}\; L(r) = \emptyset$:
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   190
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   191
        \begin{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   192
          z(\ZERO) &\dn true\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   193
          z(\ONE)  &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   194
          z(c)     &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   195
          z(r_1 + r_2) &\dn z(r_1) \wedge z(r_2)\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   196
          z(r_1 \cdot r_2) &\dn z(r_1) \vee z(r_2)\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   197
          z(r^*)  &\dn false
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   198
        \end{align}\bigskip
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   199
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   200
        \textbf{atmostempty}: The property is ``either $L(r) = \emptyset$ or $L(r) = \{[]\}$'', which
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   201
        is more formally $a(r) \;\text{iff}\; L(r) \subseteq \{[]\}$:
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   202
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   203
        \begin{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   204
          a(\ZERO) &\dn true\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   205
          a(\ONE)  &\dn true\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   206
          a(c)     &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   207
          a(r_1 + r_2) &\dn a(r_1) \wedge a(r_2)\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   208
          a(r_1 \cdot r_2) &\dn z(r_1) \vee z(r_2) \vee (a(r_1) \wedge a(r_2))\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   209
          a(r^*)  &\dn a(r)
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   210
        \end{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   211
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   212
        For this it is good to remember the regex should either not
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   213
        match anything at all, or just the empty string.\bigskip
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   214
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   215
        \textbf{somechars}: The property is ``$L(r)$ must contain a string which is not the empty string'', which
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   216
        is more formally $s(r) \;\text{iff}\; \exists\,s. s \not= [] \wedge s \in L(r)$:
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   217
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   218
        \begin{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   219
          s(\ZERO) &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   220
          s(\ONE)  &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   221
          s(c)     &\dn true\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   222
          s(r_1 + r_2) &\dn s(r_1) \vee s(r_2)\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   223
          s(r_1 \cdot r_2) &\dn (\neg z(r_1) \wedge s(r_2)) \;\vee\; (\neg z(r_2) \wedge s(r_1))\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   224
          s(r^*)  &\dn s(r)
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   225
        \end{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   226
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   227
        Here the interesting case is $r_1 \cdot r_2$ where one essentially has to make sure
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   228
        that one of the regexes is not zeroable, because then the resulting regex cannot match any
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   229
        string.\bigskip
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   230
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   231
        \textbf{infinitestrings}: The property is
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   232
        $i(r) \;\text{iff}\; L(r)\;\text{is infinite}$:
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   233
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   234
        \begin{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   235
          i(\ZERO) &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   236
          i(\ONE)  &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   237
          i(c)     &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   238
          i(r_1 + r_2) &\dn i(r_1) \vee i(r_2)\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   239
          i(r_1 \cdot r_2) &\dn (\neg z(r_1) \wedge i(r_2)) \;\vee\; (\neg z(r_2) \wedge i(r_1))\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   240
          i(r^*)  &\dn \neg a(r)
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   241
        \end{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   242
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   243
        Here the interesting bit is that as soon $r$ can match at least a single string, then $r^*$
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   244
        will match infinitely many strings.
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   245
        }
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   246
498
ea47c3b8f35f updated
Christian Urban <urbanc@in.tum.de>
parents: 444
diff changeset
   247
      
892
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 843
diff changeset
   248
\item There are two kinds of automata that are generated for 
843
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   249
  regular expression matching---DFAs and NFAs. (1) Regular expression engines like
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   250
  the one in Python generate NFAs.  Explain what is the problem with such
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   251
  NFAs and what is the reason why they use NFAs. (2) Regular expression
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   252
  engines like the one in Rust generate DFAs. Explain what is the
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   253
  problem with these regex engines and also what is the problem with $a^{\{1000\}}$
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   254
  in these engines.
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   255
      
146
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   256
%\item (Optional) The tokenizer in \texttt{regexp3.scala} takes as
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   257
%argument a string and a list of rules. The result is a list of tokens. Improve this tokenizer so 
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   258
%that it filters out all comments and whitespace from the result.
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   259
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   260
%\item (Optional) Modify the tokenizer in \texttt{regexp2.scala} so that it
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   261
%implements the \texttt{findAll} function. This function takes a regular
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   262
%expressions and a string, and returns all substrings in this string that 
444
3056a4c071b0 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 401
diff changeset
   263
      %match the regular expression.
3056a4c071b0 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 401
diff changeset
   264
3056a4c071b0 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 401
diff changeset
   265
\item \POSTSCRIPT  
31
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   266
\end{enumerate}
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   267
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   268
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   269
\end{document}
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   270
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   271
%%% Local Variables: 
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   272
%%% mode: latex
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   273
%%% TeX-master: t
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   274
%%% End: