hws/hw03.tex
author Christian Urban <christian.urban@kcl.ac.uk>
Thu, 05 Oct 2023 14:36:54 +0100
changeset 940 46eee459a999
parent 937 dc5ab66b11cc
child 941 66adcae6c762
permissions -rw-r--r--
updated
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
23
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     1
\documentclass{article}
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 258
diff changeset
     2
\usepackage{../style}
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 258
diff changeset
     3
\usepackage{../graphics}
23
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     4
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     5
\begin{document}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     6
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     7
\section*{Homework 3}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
     8
916
10f834eb0a9e texupdate
Christian Urban <christian.urban@kcl.ac.uk>
parents: 892
diff changeset
     9
%\HEADER
347
22b5294daa2a updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 294
diff changeset
    10
23
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    11
\begin{enumerate}
647
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 577
diff changeset
    12
\item The regular expression matchers in Java, Python and Ruby can be
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 577
diff changeset
    13
  very slow with some (basic) regular expressions. What is the main
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 577
diff changeset
    14
  reason for this inefficient computation?
892
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    15
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    16
  \solution{Many matchers employ DFS type of algorithms to check
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    17
    if a string is matched by the regex or not. Such algorithms
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    18
    require backtracking if have gone down the wrong path which
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    19
    can be very slow. There are also problems with bounded regular
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    20
  expressions and backreferences.}
647
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 577
diff changeset
    21
  
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 355
diff changeset
    22
\item What is a regular language? Are there alternative ways
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 355
diff changeset
    23
      to define this notion? If yes, give an explanation why
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 355
diff changeset
    24
      they define the same notion.
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    25
892
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    26
      \solution{A regular language is a language for which every string
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    27
        can be recognized by some regular expression. Another definition is
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    28
        that it is a language for which a finite automaton can be
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    29
        constructed. Both define the same set of languages.}   
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    30
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    31
\item Why is every finite set of strings a regular language?
132
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 102
diff changeset
    32
892
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    33
  \solution{Take a regex composed of all strings (works for finite languages)}
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    34
  
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 355
diff changeset
    35
\item Assume you have an alphabet consisting of the letters
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 355
diff changeset
    36
      $a$, $b$ and $c$ only. (1) Find a regular expression
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 355
diff changeset
    37
      that recognises the two strings $ab$ and $ac$. (2) Find
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 355
diff changeset
    38
      a regular expression that matches all strings
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 355
diff changeset
    39
      \emph{except} these two strings. Note, you can only use
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 355
diff changeset
    40
      regular expressions of the form
258
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 146
diff changeset
    41
      
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    42
  \begin{center} $r ::=
401
5d85dc9779b1 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 355
diff changeset
    43
    \ZERO \;|\; \ONE \;|\; c \;|\; r_1 + r_2 \;|\;
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    44
    r_1 \cdot r_2 \;|\; r^*$ 
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    45
  \end{center}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    46
647
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 577
diff changeset
    47
%\item Define the function \textit{zeroable} which takes a
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 577
diff changeset
    48
%      regular expression as argument and returns a boolean.
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 577
diff changeset
    49
%      The function should satisfy the following property:
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 577
diff changeset
    50
%
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 577
diff changeset
    51
%  \begin{center}
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 577
diff changeset
    52
%    $\textit{zeroable(r)} \;\text{if and only if}\; 
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 577
diff changeset
    53
%    L(r) = \{\}$
180600c04da2 updated
Christian Urban <urbanc@in.tum.de>
parents: 577
diff changeset
    54
%  \end{center}
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    55
892
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    56
  \solution{Done in the video but there I forgot to include the empty string.}
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    57
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    58
\item Given the alphabet $\{a,b\}$. Draw the automaton that has two
517
edab48a5b37e updated
cu
parents: 444
diff changeset
    59
  states, say $Q_0$ and $Q_1$.  The starting state is $Q_0$ and the
edab48a5b37e updated
cu
parents: 444
diff changeset
    60
  final state is $Q_1$. The transition function is given by
258
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 146
diff changeset
    61
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    62
  \begin{center}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    63
    \begin{tabular}{l}
517
edab48a5b37e updated
cu
parents: 444
diff changeset
    64
      $(Q_0, a) \rightarrow Q_0$\\
edab48a5b37e updated
cu
parents: 444
diff changeset
    65
      $(Q_0, b) \rightarrow Q_1$\\
edab48a5b37e updated
cu
parents: 444
diff changeset
    66
      $(Q_1, b) \rightarrow Q_1$
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    67
    \end{tabular}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    68
  \end{center}
258
1e4da6d2490c updated programs
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 146
diff changeset
    69
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    70
  What is the language recognised by this automaton?
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    71
937
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    72
  
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    73
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
    74
\item Give a non-deterministic finite automaton that can
937
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    75
  recognise the language $L(a\cdot (a + b)^* \cdot c)$.
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    76
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    77
  \solution{It is already possible to just read off the automaton without
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
    78
  going through Thompson.}
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
    79
517
edab48a5b37e updated
cu
parents: 444
diff changeset
    80
\item Given a deterministic finite automaton $A(\varSigma, Q, Q_0, F,
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
    81
      \delta)$, define which language is recognised by this
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
    82
      automaton. Can you define also the language defined by a
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
    83
      non-deterministic automaton?
23
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
    84
892
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    85
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    86
      \solution{
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    87
        A formula for DFAs is
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    88
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    89
        \[L(A) \dn \{s \;|\; \hat{\delta}(start_q, s) \in F\}\]
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    90
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    91
        For NFAs you need to first define what $\hat{\rho}$ means. If
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    92
        $\rho$ is given as a relation, you can define:
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    93
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    94
        \[
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    95
          \hat{\rho}(qs, []) \dn qs \qquad
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    96
          \hat{\rho}(qs, c::s) \dn \bigcup_{q\in qs} \{ q' \; | \; \rho(q, c, q')\}
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    97
        \]
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    98
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
    99
        This ``collects'' all the states reachable in a breadth-first
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   100
        manner. Once you have all the states reachable by an NFA, you can define
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   101
        the language as
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   102
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   103
        \[
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   104
        L(N) \dn \{s \;|\; \hat{\rho}(qs_{start}, s) \cap F \not= \emptyset\}
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   105
        \]  
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   106
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   107
        Here you test whether the all states reachable (for $s$) contain at least
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   108
        a single accepting state.
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   109
        
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   110
      }
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   111
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   112
\item Given the following deterministic finite automaton over
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   113
      the alphabet $\{a, b\}$, find an automaton that
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   114
      recognises the complement language. (Hint: Recall that
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   115
      for the algorithm from the lectures, the automaton needs
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   116
      to be in completed form, that is have a transition for
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   117
      every letter from the alphabet.)
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 258
diff changeset
   118
892
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   119
      \solution{
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   120
        Before exchanging accepting and non-accepting states, it is important that
940
46eee459a999 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 937
diff changeset
   121
        the automaton is completed (meaning has a transition for every letter
892
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   122
        of the alphabet). If not completed, you have to introduce a sink state.
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   123
940
46eee459a999 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 937
diff changeset
   124
        For fun you can try out the example without
46eee459a999 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 937
diff changeset
   125
        completion: Then the original automaton can recognise
892
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   126
        strings of the form $a$, $ab...b$; but the ``uncompleted'' automaton would
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   127
        recognise only the empty string.
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   128
      }
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   129
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   130
  \begin{center}
292
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   131
    \begin{tikzpicture}[>=stealth',very thick,auto,
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   132
                        every state/.style={minimum size=0pt,
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   133
                        inner sep=2pt,draw=blue!50,very thick,
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   134
                        fill=blue!20},scale=2]
517
edab48a5b37e updated
cu
parents: 444
diff changeset
   135
      \node[state, initial]        (q0) at ( 0,1) {$Q_0$};
edab48a5b37e updated
cu
parents: 444
diff changeset
   136
      \node[state, accepting]  (q1) at ( 1,1) {$Q_1$};
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   137
      \path[->] (q0) edge node[above] {$a$} (q1)
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   138
                (q1) edge [loop right] node {$b$} ();
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   139
    \end{tikzpicture}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   140
  \end{center}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   141
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 258
diff changeset
   142
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 258
diff changeset
   143
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   144
%\item Given the following deterministic finite automaton
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   145
%
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   146
%\begin{center}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   147
%\begin{tikzpicture}[scale=3, line width=0.7mm]
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   148
%  \node[state, initial]        (q0) at ( 0,1) {$q_0$};
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   149
%  \node[state,accepting]  (q1) at ( 1,1) {$q_1$};
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   150
%  \node[state, accepting] (q2) at ( 2,1) {$q_2$};
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   151
%  \path[->] (q0) edge node[above] {$b$} (q1)
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   152
%                  (q1) edge [loop above] node[above] {$a$} ()
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   153
%                  (q2) edge [loop above] node[above] {$a, b$} ()
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   154
%                  (q1) edge node[above] {$b$} (q2)
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   155
%                  (q0) edge[bend right] node[below] {$a$} (q2)
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   156
%                  ;
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   157
%\end{tikzpicture}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   158
%\end{center}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   159
%find the corresponding minimal automaton. State clearly which nodes
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   160
%can be merged.
31
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents: 30
diff changeset
   161
355
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   162
\item Given the following non-deterministic finite automaton
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   163
      over the alphabet $\{a, b\}$, find a deterministic
a259eec25156 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 347
diff changeset
   164
      finite automaton that recognises the same language:
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   165
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   166
  \begin{center}
292
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   167
    \begin{tikzpicture}[>=stealth',very thick,auto,
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   168
                        every state/.style={minimum size=0pt,
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   169
                        inner sep=2pt,draw=blue!50,very thick,
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   170
                        fill=blue!20},scale=2]
517
edab48a5b37e updated
cu
parents: 444
diff changeset
   171
      \node[state, initial]        (q0) at ( 0,1) {$Q_0$};
edab48a5b37e updated
cu
parents: 444
diff changeset
   172
      \node[state]                    (q1) at ( 1,1) {$Q_1$};
edab48a5b37e updated
cu
parents: 444
diff changeset
   173
      \node[state, accepting] (q2) at ( 2,1) {$Q_2$};
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   174
      \path[->] (q0) edge node[above] {$a$} (q1)
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   175
                (q0) edge [loop above] node[above] {$b$} ()
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   176
                (q0) edge [loop below] node[below] {$a$} ()
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   177
                (q1) edge node[above] {$a$} (q2);
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   178
    \end{tikzpicture}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   179
  \end{center}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   180
778
3e5f5d19f514 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 770
diff changeset
   181
\item %%\textbf{(Deleted for 2017, 2018, 2019)}
517
edab48a5b37e updated
cu
parents: 444
diff changeset
   182
  Given the following deterministic finite automaton over the
271
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   183
  alphabet $\{0, 1\}$, find the corresponding minimal automaton. In
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   184
  case states can be merged, state clearly which states can be merged.
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   185
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   186
  \begin{center}
292
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   187
    \begin{tikzpicture}[>=stealth',very thick,auto,
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   188
                        every state/.style={minimum size=0pt,
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   189
                        inner sep=2pt,draw=blue!50,very thick,
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   190
                        fill=blue!20},scale=2]
517
edab48a5b37e updated
cu
parents: 444
diff changeset
   191
      \node[state, initial]        (q0) at ( 0,1) {$Q_0$};
edab48a5b37e updated
cu
parents: 444
diff changeset
   192
      \node[state]                    (q1) at ( 1,1) {$Q_1$};
edab48a5b37e updated
cu
parents: 444
diff changeset
   193
      \node[state, accepting] (q4) at ( 2,1) {$Q_4$};
edab48a5b37e updated
cu
parents: 444
diff changeset
   194
      \node[state]                    (q2) at (0.5,0) {$Q_2$};
edab48a5b37e updated
cu
parents: 444
diff changeset
   195
      \node[state]                    (q3) at (1.5,0) {$Q_3$};
271
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   196
      \path[->] (q0) edge node[above] {$0$} (q1)
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   197
                (q0) edge node[right] {$1$} (q2)
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   198
                (q1) edge node[above] {$0$} (q4)
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   199
                (q1) edge node[right] {$1$} (q2)
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   200
                (q2) edge node[above] {$0$} (q3)
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   201
                (q2) edge [loop below] node {$1$} ()
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   202
                (q3) edge node[left] {$0$} (q4)
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   203
                (q3) edge [bend left=95, looseness = 2.2] node [left=2mm] {$1$} (q0)
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   204
                (q4) edge [loop right] node {$0, 1$} ();
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   205
    \end{tikzpicture}
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   206
  \end{center}
b9b54574ee41 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 267
diff changeset
   207
892
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   208
  \solution{Q0 and Q2 can be merged; and Q1 and Q3 as well}
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   209
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   210
\item Given the following finite deterministic automaton over the alphabet $\{a, b\}$:
264
4deef8ac5d72 uodated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 258
diff changeset
   211
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   212
  \begin{center}
292
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   213
    \begin{tikzpicture}[scale=2,>=stealth',very thick,auto,
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   214
                        every state/.style={minimum size=0pt,
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   215
                        inner sep=2pt,draw=blue!50,very thick,
7ed2a25dd115 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 271
diff changeset
   216
                        fill=blue!20}]
517
edab48a5b37e updated
cu
parents: 444
diff changeset
   217
      \node[state, initial, accepting]        (q0) at ( 0,1) {$Q_0$};
edab48a5b37e updated
cu
parents: 444
diff changeset
   218
      \node[state, accepting]                    (q1) at ( 1,1) {$Q_1$};
edab48a5b37e updated
cu
parents: 444
diff changeset
   219
      \node[state] (q2) at ( 2,1) {$Q_2$};
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   220
      \path[->] (q0) edge[bend left] node[above] {$a$} (q1)
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   221
                (q1) edge[bend left] node[above] {$b$} (q0)
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   222
                (q2) edge[bend left=50] node[below] {$b$} (q0)
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   223
                (q1) edge node[above] {$a$} (q2)
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   224
                (q2) edge [loop right] node {$a$} ()
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   225
                (q0) edge [loop below] node {$b$} ()
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   226
            ;
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   227
    \end{tikzpicture}
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   228
  \end{center}
31
e22ba348b209 added hw04
Christian Urban <urbanc@in.tum.de>
parents: 30
diff changeset
   229
267
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   230
  Give a regular expression that can recognise the same language as
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   231
  this automaton. (Hint: If you use Brzozwski's method, you can assume
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   232
  Arden's lemma which states that an equation of the form $q = q\cdot r + s$
a1544b804d1e updated homeworks
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 264
diff changeset
   233
  has the unique solution $q = s \cdot r^*$.)
294
c29853b672fb updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 292
diff changeset
   234
c29853b672fb updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 292
diff changeset
   235
\item If a non-deterministic finite automaton (NFA) has
770
c563cf946497 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 652
diff changeset
   236
  $n$ states. How many states does a deterministic 
c563cf946497 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 652
diff changeset
   237
  automaton (DFA) that can recognise the same language
c563cf946497 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 652
diff changeset
   238
  as the NFA maximal need?
294
c29853b672fb updated hws
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 292
diff changeset
   239
937
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   240
  \solution{$2^n$ in the worst-case and for some regexes the worst case
892
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   241
    cannot be avoided. 
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   242
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   243
    Other comments: $r^{\{n\}}$ can only be represented as $n$
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   244
    copies of the automaton for $r$, which can explode the automaton for bounded
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   245
    regular expressions. Similarly, we have no idea how backreferences can be
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   246
    represented as automaton.
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   247
  }
f4df090a84d0 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 778
diff changeset
   248
937
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   249
\item Rust implements a non-backtracking regular expression matcher
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   250
  based on the classic idea of DFAs. Still, some regular expressions
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   251
  take a surprising amount of time for matching problems. Explain the
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   252
  problem?
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   253
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   254
  \solution{The problem has to do with bounded regular expressions,
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   255
    such as $r^{\{n\}}$. They are represented as $n$-copies of some
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   256
    automaton for $r$. If $n$ is large, then this can result in a
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   257
    large memory-footprint and slow runtime.}
dc5ab66b11cc updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 916
diff changeset
   258
770
c563cf946497 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 652
diff changeset
   259
\item Prove that for all regular expressions $r$ we have
c563cf946497 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 652
diff changeset
   260
      
c563cf946497 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 652
diff changeset
   261
\begin{center} 
c563cf946497 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 652
diff changeset
   262
  $\textit{nullable}(r) \quad \text{if and only if} 
c563cf946497 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 652
diff changeset
   263
  \quad [] \in L(r)$ 
c563cf946497 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 652
diff changeset
   264
\end{center}
c563cf946497 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 652
diff changeset
   265
c563cf946497 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 652
diff changeset
   266
      Write down clearly in each case what you need to prove
c563cf946497 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 652
diff changeset
   267
      and what are the assumptions. 
c563cf946497 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 652
diff changeset
   268
c563cf946497 updated
Christian Urban <christian.urban@kcl.ac.uk>
parents: 652
diff changeset
   269
  
444
3056a4c071b0 updated
Christian Urban <christian dot urban at kcl dot ac dot uk>
parents: 401
diff changeset
   270
\item \POSTSCRIPT  
23
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   271
\end{enumerate}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   272
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   273
\end{document}
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   274
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   275
%%% Local Variables: 
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   276
%%% mode: latex
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   277
%%% TeX-master: t
Christian Urban <urbanc@in.tum.de>
parents:
diff changeset
   278
%%% End: