hws/hw03.tex
changeset 892 f4df090a84d0
parent 778 3e5f5d19f514
child 916 10f834eb0a9e
--- a/hws/hw03.tex	Fri Oct 14 12:16:07 2022 +0100
+++ b/hws/hw03.tex	Fri Oct 21 13:30:12 2022 +0100
@@ -2,6 +2,14 @@
 \usepackage{../style}
 \usepackage{../graphics}
 
+\newcommand{\solution}[1]{%
+  \begin{quote}\sf%
+    #1%
+  \end{quote}}
+\renewcommand{\solution}[1]{}
+
+
+
 \begin{document}
 
 \section*{Homework 3}
@@ -12,13 +20,26 @@
 \item The regular expression matchers in Java, Python and Ruby can be
   very slow with some (basic) regular expressions. What is the main
   reason for this inefficient computation?
+
+  \solution{Many matchers employ DFS type of algorithms to check
+    if a string is matched by the regex or not. Such algorithms
+    require backtracking if have gone down the wrong path which
+    can be very slow. There are also problems with bounded regular
+  expressions and backreferences.}
   
 \item What is a regular language? Are there alternative ways
       to define this notion? If yes, give an explanation why
       they define the same notion.
 
+      \solution{A regular language is a language for which every string
+        can be recognized by some regular expression. Another definition is
+        that it is a language for which a finite automaton can be
+        constructed. Both define the same set of languages.}   
+
 \item Why is every finite set of strings a regular language?
 
+  \solution{Take a regex composed of all strings (works for finite languages)}
+  
 \item Assume you have an alphabet consisting of the letters
       $a$, $b$ and $c$ only. (1) Find a regular expression
       that recognises the two strings $ab$ and $ac$. (2) Find
@@ -40,6 +61,8 @@
 %    L(r) = \{\}$
 %  \end{center}
 
+  \solution{Done in the video but there I forgot to include the empty string.}
+
 \item Given the alphabet $\{a,b\}$. Draw the automaton that has two
   states, say $Q_0$ and $Q_1$.  The starting state is $Q_0$ and the
   final state is $Q_1$. The transition function is given by
@@ -62,6 +85,33 @@
       automaton. Can you define also the language defined by a
       non-deterministic automaton?
 
+
+      \solution{
+        A formula for DFAs is
+
+        \[L(A) \dn \{s \;|\; \hat{\delta}(start_q, s) \in F\}\]
+
+        For NFAs you need to first define what $\hat{\rho}$ means. If
+        $\rho$ is given as a relation, you can define:
+
+        \[
+          \hat{\rho}(qs, []) \dn qs \qquad
+          \hat{\rho}(qs, c::s) \dn \bigcup_{q\in qs} \{ q' \; | \; \rho(q, c, q')\}
+        \]
+
+        This ``collects'' all the states reachable in a breadth-first
+        manner. Once you have all the states reachable by an NFA, you can define
+        the language as
+
+        \[
+        L(N) \dn \{s \;|\; \hat{\rho}(qs_{start}, s) \cap F \not= \emptyset\}
+        \]  
+
+        Here you test whether the all states reachable (for $s$) contain at least
+        a single accepting state.
+        
+      }
+
 \item Given the following deterministic finite automaton over
       the alphabet $\{a, b\}$, find an automaton that
       recognises the complement language. (Hint: Recall that
@@ -69,6 +119,17 @@
       to be in completed form, that is have a transition for
       every letter from the alphabet.)
 
+      \solution{
+        Before exchanging accepting and non-accepting states, it is important that
+        the automaton is completed (meamning has a transition for every letter
+        of the alphabet). If not completed, you have to introduce a sink state.
+
+        For fun you can try out the example with
+        out completion: Then the original automaton can recognise
+        strings of the form $a$, $ab...b$; but the ``uncompleted'' automaton would
+        recognise only the empty string.
+      }
+
   \begin{center}
     \begin{tikzpicture}[>=stealth',very thick,auto,
                         every state/.style={minimum size=0pt,
@@ -147,6 +208,8 @@
     \end{tikzpicture}
   \end{center}
 
+  \solution{Q0 and Q2 can be merged; and Q1 and Q3 as well}
+
 \item Given the following finite deterministic automaton over the alphabet $\{a, b\}$:
 
   \begin{center}
@@ -177,6 +240,15 @@
   automaton (DFA) that can recognise the same language
   as the NFA maximal need?
 
+  \solution{ $2^n$ in the worst-case and for some regexes the worst case
+    cannot be avoided. 
+
+    Other comments: $r^{\{n\}}$ can only be represented as $n$
+    copies of the automaton for $r$, which can explode the automaton for bounded
+    regular expressions. Similarly, we have no idea how backreferences can be
+    represented as automaton.
+  }
+
 \item Prove that for all regular expressions $r$ we have
       
 \begin{center}