hws/hw04.tex
author Christian Urban <christian.urban@kcl.ac.uk>
Fri, 29 Nov 2024 18:59:32 +0000
changeset 976 e9eac62928f5
parent 943 5365ef60707e
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
31
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     1
\documentclass{article}
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
     2
\usepackage{../style}
292
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
     3
\usepackage{../graphics}
146
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
     4
31
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     5
\begin{document}
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     6
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     7
\section*{Homework 4}
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     8
916
10f834eb0a9e texupdate
Christian Urban <christian.urban@kcl.ac.uk>
parents: 893
diff changeset
     9
%%\HEADER
347
22b5294daa2a updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 294
diff changeset
    10
31
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    11
\begin{enumerate}
34
Christian Urban <urbanc@in.tum.de>
parents: 32
diff changeset
    12
726
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    13
\item Given the regular expressions
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    14
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    15
\begin{center}
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    16
\begin{tabular}{ll}    
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    17
  1) & $(ab + a)\cdot (\ONE + b)$\\
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    18
  2) & $(aa + a)^*$\\
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    19
\end{tabular}
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    20
\end{center}
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    21
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    22
there are several values for how these regular expressions can
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    23
recognise the strings (for 1) $ab$ and (for 2) $aaa$. Give in each case
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    24
\emph{all} the values and indicate which one is the POSIX value.
943
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    25
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    26
\solution{
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    27
  1) There are only 2 values (writing $a$ for $Char(a)$ and so on)
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    28
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    29
  \begin{center}
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    30
  \begin{tabular}{l}
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    31
    $Sequ(Left(Sequ(a,b)),Left(Empty))$\\
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    32
    $Sequ(Right(a),Left(b))$\\ 
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    33
  \end{tabular}    
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    34
  \end{center}
726
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    35
  
943
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    36
  The first is the POSIX value because of the preference for $Left$.
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    37
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    38
  2) There are three ``main'' values, namely
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    39
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    40
  \begin{center}
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    41
  \begin{tabular}{l}
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    42
    $Stars\,[Left(Sequ(a,a)),Right(a)]$\\
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    43
    $Stars\,[Right(a), Left(Sequ(a,a))]$\\
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    44
    $Stars\,[Right(a), Right(a), Right(a)]$\\
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    45
  \end{tabular}    
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    46
  \end{center}
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    47
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    48
  Again the first one is the POSIX value, but if it just about all
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    49
  possible values, then there are in fact infinitely many values because
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    50
  the following 
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    51
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    52
  \begin{center}
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    53
  \begin{tabular}{l}
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    54
    $Stars\,[Left(Sequ(a,a)),Empty,Right(a)]$\\
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    55
    $Stars\,[Left(Sequ(a,a)),Empty,Empty,Right(a)]$\\
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    56
    $Stars\,[Left(Sequ(a,a)),Empty,Right(a), Empty]$, \ldots\\
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    57
  \end{tabular}    
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    58
  \end{center}
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    59
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    60
  are also values for this regex and the string $aaa$.
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    61
}  
726
fba480bbc9f7 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 577
diff changeset
    62
444
3056a4c071b0 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 401
diff changeset
    63
\item If a regular expression $r$ does not contain any occurrence of $\ZERO$,  
893
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    64
  is it possible for $L(r)$ to be empty? Explain why, or give a proof.
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    65
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    66
  \solution{
943
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
    67
    No. The property to prove by induction is
893
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    68
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    69
    \begin{center}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    70
    $P(r)$: If $r$ does not contain $\ZERO$, then $L(r) \not= \emptyset$.  
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    71
  \end{center}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    72
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    73
  For this you have to now go through all cases.
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    74
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    75
  Case $r = 0$: $P(\ZERO)$ says: If $\ZERO$ does not contain $\ZERO$
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    76
  then \ldots. The premise is obviously false, so everything follows,
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    77
  in particular $L(r) \not= \emptyset$.\medskip
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    78
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    79
  Case $r = \ONE$ and $r = c$ are similar, just that the premise is
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    80
  true, but also $L(\ONE)$ and $L(c)$ are not empty. So we shown
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    81
  $L(r) \not= \emptyset$.\medskip
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    82
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    83
  Case $r = r_1 + r_2$: We know $P(r_1)$ and $P(r_2)$ as IHs. We need to show
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    84
  $P(r_1 + r_2)$: If $r_1 + r_2$ does not contain $\ZERO$, then $L(r_1 + r_2) \not= \emptyset$.
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    85
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    86
  If $r_1 + r_2$ does not contain $\ZERO$, then also $r_1$ does not contain $\ZERO$
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    87
  and $r_2$ does not contain $\ZERO$. So we can apply the two IHs $P(r_1)$ and $P(r_2)$,
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    88
  which allow us to infer that $L(r_1) \not= \emptyset$ and $L(r_2) \not= \emptyset$.
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    89
  But if this is the case, then also $L(r_1 + r_2) \not= \emptyset$, which is what we needed
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    90
  to show in this case.\medskip
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    91
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    92
  The other cases are similar.\bigskip
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    93
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    94
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    95
  This lemma essentially says that for basic regular expressions, if
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    96
  they do not match anything at all, they must contain $\ZERO$(s)
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    97
  somewhere.
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    98
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
    99
}
32
d085fe0c086f started
Christian Urban <urbanc@in.tum.de>
parents: 31
diff changeset
   100
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
   101
\item Define the tokens and regular expressions for a language
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   102
  consisting of numbers, left-parenthesis $($, right-parenthesis $)$,
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   103
  identifiers and the operations $+$, $-$ and $*$. Can the following
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   104
  strings in this language be lexed?
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
   105
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   106
  \begin{itemize}
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
   107
  \item $(a + 3) * b$
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
   108
  \item $)()++ -33$
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
   109
  \item $(a / 3) * 3$
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   110
  \end{itemize}
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
   111
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   112
  In case they can, can you give the corresponding token
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   113
  sequences.
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 166
diff changeset
   114
943
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   115
  \solution{
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   116
  The first 2 are lexibile. The 3 one contains $/$ which is not an operator.
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   117
  }  
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   118
768
34f77b976b88 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   119
\item Assume $r$ is nullable. Show that
34f77b976b88 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   120
  \[ 1 + r + r\cdot r \;\equiv\; r\cdot r
34f77b976b88 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   121
  \]
34f77b976b88 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   122
34f77b976b88 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   123
  holds.
34f77b976b88 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 726
diff changeset
   124
893
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   125
  \solution{
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   126
    If $r$ is nullable, then $1 + r \equiv r$. With this you can replace
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   127
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   128
    \begin{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   129
      (1 + r) + r\cdot r & \equiv  r + r\cdot r\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   130
                         & \equiv  r \cdot (1 + r)\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   131
                         & \equiv  r \cdot r
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   132
    \end{align}  
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   133
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   134
    where in (2) you pull out the ``factor'' $r$ (because $r_1 \cdot (r_2 + r_3) \equiv r_1 \cdot r_2 + r_1 \cdot r_3$).
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   135
  }
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   136
  
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   137
939
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   138
%\item \textbf{(Deleted)} Assume that $s^{-1}$ stands for the operation of reversing a
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   139
%  string $s$. Given the following \emph{reversing} function on regular
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   140
%  expressions
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   141
%
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   142
%  \begin{center}
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   143
%    \begin{tabular}{r@{\hspace{1mm}}c@{\hspace{1mm}}l}
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   144
%      $rev(\ZERO)$   & $\dn$ & $\ZERO$\\
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   145
%      $rev(\ONE)$         & $\dn$ & $\ONE$\\
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   146
%      $rev(c)$                      & $\dn$ & $c$\\
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   147
%      $rev(r_1 + r_2)$        & $\dn$ & $rev(r_1) + rev(r_2)$\\
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   148
%      $rev(r_1 \cdot r_2)$  & $\dn$ & $rev(r_2) \cdot rev(r_1)$\\
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   149
%      $rev(r^*)$                   & $\dn$ & $rev(r)^*$\\
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   150
%    \end{tabular}
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   151
%  \end{center}
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   152
%
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   153
%  and the set
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   154
%
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   155
%  \begin{center}
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   156
%    $Rev\,A \dn \{s^{-1} \;|\; s \in A\}$
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   157
%  \end{center}
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   158
%
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   159
%  prove whether
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   160
%
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   161
%  \begin{center}
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   162
%    $L(rev(r)) = Rev (L(r))$
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   163
%  \end{center}
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   164
%
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   165
%  holds.
34
Christian Urban <urbanc@in.tum.de>
parents: 32
diff changeset
   166
939
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   167
\item Construct a regular expression that can validate passwords. A
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   168
  password should be at least 8 characters long and consist of upper-
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   169
  and lower-case letters and digits. It should contain at least a
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   170
  single lower-case letter, at least a single upper-case letter and at
943
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   171
  least a single digit. If possible use the intersection regular
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   172
  expression from CW1, written $\_\&\_$, and the bounded regular
939
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   173
  expressions; you can also assume a regular expression written
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   174
  \texttt{ALL} that can match any character.
31
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   175
939
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   176
  \solution{
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   177
    You can build-up the different constraints separately and then
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   178
    use the intersection operator:
32
d085fe0c086f started
Christian Urban <urbanc@in.tum.de>
parents: 31
diff changeset
   179
939
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   180
  \begin{center}  
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   181
  \begin{tabular}{lll}  
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   182
    $ALL^{\{8..\}}$ & \;\&\; & $(ALL^*\cdot [a-z]\cdot ALL^*)$\\
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   183
                   & \;\&\; & $(ALL^*\cdot [A-Z]\cdot ALL^*)$\\
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   184
                   & \;\&\; & $(ALL^*\cdot [0-9]\cdot ALL^*)$\\
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   185
  \end{tabular}
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   186
  \end{center}
943
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   187
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   188
  $ALL$ could be represented as $\sim \ZERO$.
939
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   189
  }
f85e784d3014 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   190
  
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   191
\item Assume the delimiters for comments are
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   192
      \texttt{$\slash$*} and \texttt{*$\slash$}. Give a
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   193
      regular expression that can recognise comments of the
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   194
      form
42
Christian Urban <urbanc@in.tum.de>
parents: 34
diff changeset
   195
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   196
  \begin{center}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   197
    \texttt{$\slash$*~\ldots{}~*$\slash$} 
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   198
  \end{center}
42
Christian Urban <urbanc@in.tum.de>
parents: 34
diff changeset
   199
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   200
      where the three dots stand for arbitrary characters, but
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   201
      not comment delimiters. (Hint: You can assume you are
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   202
      already given a regular expression written \texttt{ALL},
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   203
      that can recognise any character, and a regular
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   204
      expression \texttt{NOT} that recognises the complement
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   205
      of a regular expression.)
943
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   206
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   207
      \solution{
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   208
        \[/ * \sim (ALL^* * / ALL^*) * /\]
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   209
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   210
      The idea to make sure in between $/ *$ and $* /$ ar no strings that contain $* /$.  
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   211
      }
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   212
      
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   213
\item Simplify the regular expression
42
Christian Urban <urbanc@in.tum.de>
parents: 34
diff changeset
   214
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   215
\[
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   216
(\ZERO \cdot (b \cdot c)) + 
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   217
((\ZERO \cdot c) + \ONE)
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   218
\]
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   219
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   220
      Does simplification always preserve the meaning of a
943
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   221
      regular expression?
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   222
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   223
      \solution{ Yes, simplification preserves the language. It
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   224
        simplifies to just $\ONE$. It should be remembered that the
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   225
        Brzozowski does not simplify under stars. This does not apply
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   226
        in this example, though.  }
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   227
     
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   228
\item The Sulzmann \& Lu algorithm contains the function
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   229
      $mkeps$ which answers how a regular expression can match
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   230
      the empty string. What is the answer of $mkeps$ for the
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   231
      regular expressions:    
146
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   232
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   233
  \[
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   234
  \begin{array}{l}
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   235
  (\ZERO \cdot (b \cdot c)) + 
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   236
  ((\ZERO \cdot c) + \ONE)\\
577
7a437f1f689d updated
Christian Urban <urbanc@in.tum.de>
parents: 525
diff changeset
   237
    (a + \ONE) \cdot (\ONE + \ONE)\\
7a437f1f689d updated
Christian Urban <urbanc@in.tum.de>
parents: 525
diff changeset
   238
    a^*
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   239
  \end{array}
943
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   240
\]
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   241
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   242
\solution{
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   243
  The values are
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   244
  \begin{center}
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   245
  \begin{tabular}{l}
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   246
    $Right(Right(Empty))$\\
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   247
    $Sequ(Right(\ONE),Left(\ONE))$\\
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   248
    $Stars\,[]$
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   249
  \end{tabular}  
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   250
  \end{center}
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   251
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   252
  The last one uses the rule that $mkeps$ for the star returns always $Star\,[]$.
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   253
  }
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   254
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 359
diff changeset
   255
\item What is the purpose of the record regular expression in
943
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   256
  the Sulzmann \& Lu algorithm?
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   257
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   258
  \solution{
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   259
    It marks a part of a regular expression and can be used to extract the part of the
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   260
    string that is matched by this marked part of the regular expression.
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   261
  }
498
ea47c3b8f35f updated
Christian Urban <urbanc@in.tum.de>
parents: 444
diff changeset
   262
843
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   263
\item Recall the functions \textit{nullable} and
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   264
      \textit{zeroable}.  Define recursive functions
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   265
      \textit{atmostempty} (for regular expressions that match no
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   266
      string or only the empty string), \textit{somechars} (for
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   267
      regular expressions that match some non-empty string),
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   268
      \textit{infinitestrings} (for regular expressions that can match
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   269
      infinitely many strings).
893
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   270
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   271
      \solution{
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   272
        \textbf{zeroable}: The property is $z(r) \;\text{iff}\; L(r) = \emptyset$:
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   273
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   274
        \begin{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   275
          z(\ZERO) &\dn true\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   276
          z(\ONE)  &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   277
          z(c)     &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   278
          z(r_1 + r_2) &\dn z(r_1) \wedge z(r_2)\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   279
          z(r_1 \cdot r_2) &\dn z(r_1) \vee z(r_2)\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   280
          z(r^*)  &\dn false
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   281
        \end{align}\bigskip
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   282
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   283
        \textbf{atmostempty}: The property is ``either $L(r) = \emptyset$ or $L(r) = \{[]\}$'', which
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   284
        is more formally $a(r) \;\text{iff}\; L(r) \subseteq \{[]\}$:
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   285
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   286
        \begin{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   287
          a(\ZERO) &\dn true\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   288
          a(\ONE)  &\dn true\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   289
          a(c)     &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   290
          a(r_1 + r_2) &\dn a(r_1) \wedge a(r_2)\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   291
          a(r_1 \cdot r_2) &\dn z(r_1) \vee z(r_2) \vee (a(r_1) \wedge a(r_2))\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   292
          a(r^*)  &\dn a(r)
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   293
        \end{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   294
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   295
        For this it is good to remember the regex should either not
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   296
        match anything at all, or just the empty string.\bigskip
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   297
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   298
        \textbf{somechars}: The property is ``$L(r)$ must contain a string which is not the empty string'', which
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   299
        is more formally $s(r) \;\text{iff}\; \exists\,s. s \not= [] \wedge s \in L(r)$:
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   300
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   301
        \begin{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   302
          s(\ZERO) &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   303
          s(\ONE)  &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   304
          s(c)     &\dn true\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   305
          s(r_1 + r_2) &\dn s(r_1) \vee s(r_2)\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   306
          s(r_1 \cdot r_2) &\dn (\neg z(r_1) \wedge s(r_2)) \;\vee\; (\neg z(r_2) \wedge s(r_1))\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   307
          s(r^*)  &\dn s(r)
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   308
        \end{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   309
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   310
        Here the interesting case is $r_1 \cdot r_2$ where one essentially has to make sure
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   311
        that one of the regexes is not zeroable, because then the resulting regex cannot match any
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   312
        string.\bigskip
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   313
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   314
        \textbf{infinitestrings}: The property is
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   315
        $i(r) \;\text{iff}\; L(r)\;\text{is infinite}$:
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   316
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   317
        \begin{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   318
          i(\ZERO) &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   319
          i(\ONE)  &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   320
          i(c)     &\dn false\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   321
          i(r_1 + r_2) &\dn i(r_1) \vee i(r_2)\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   322
          i(r_1 \cdot r_2) &\dn (\neg z(r_1) \wedge i(r_2)) \;\vee\; (\neg z(r_2) \wedge i(r_1))\\
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   323
          i(r^*)  &\dn \neg a(r)
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   324
        \end{align}
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   325
943
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   326
        Here the interesting bit is that as soon $r$ can match at least a single non-empty string, then $r^*$
893
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   327
        will match infinitely many strings.
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   328
        }
54a483a33763 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
   329
498
ea47c3b8f35f updated
Christian Urban <urbanc@in.tum.de>
parents: 444
diff changeset
   330
      
892
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 843
diff changeset
   331
\item There are two kinds of automata that are generated for 
843
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   332
  regular expression matching---DFAs and NFAs. (1) Regular expression engines like
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   333
  the one in Python generate NFAs.  Explain what is the problem with such
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   334
  NFAs and what is the reason why they use NFAs. (2) Regular expression
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   335
  engines like the one in Rust generate DFAs. Explain what is the
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   336
  problem with these regex engines and also what is the problem with $a^{\{1000\}}$
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   337
  in these engines.
943
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   338
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   339
  \solution{
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   340
    Why they use NFAs? NFAs are of similar size as the regular expression (they do not explode
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   341
    for the basic regular expressions. Python regex library supports constructions like
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   342
    back-refernces which cannot be represented by DFAs (string matching with back-references
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   343
    can be NP. What is the problem with $a^{\{1000\}}$. When generating DFAs (and NFAs) for the
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   344
    bounded regular expressions, one has to make $n$ copies, which means their size can grow
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   345
    drastically for large counters.
5365ef60707e updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 939
diff changeset
   346
  }
843
97b622202547 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 768
diff changeset
   347
      
146
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   348
%\item (Optional) The tokenizer in \texttt{regexp3.scala} takes as
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   349
%argument a string and a list of rules. The result is a list of tokens. Improve this tokenizer so 
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   350
%that it filters out all comments and whitespace from the result.
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   351
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   352
%\item (Optional) Modify the tokenizer in \texttt{regexp2.scala} so that it
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   353
%implements the \texttt{findAll} function. This function takes a regular
9da175d5eb63 added new hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
   354
%expressions and a string, and returns all substrings in this string that 
444
3056a4c071b0 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 401
diff changeset
   355
      %match the regular expression.
3056a4c071b0 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 401
diff changeset
   356
3056a4c071b0 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 401
diff changeset
   357
\item \POSTSCRIPT  
31
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   358
\end{enumerate}
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   359
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   360
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   361
\end{document}
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   362
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   363
%%% Local Variables: 
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   364
%%% mode: latex
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   365
%%% TeX-master: t
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   366
%%% End: