# HG changeset patch
# User Christian Urban <urbanc@in.tum.de>
# Date 1494122489 -3600
# Node ID e28d7a327870af644be9f8855f96c591504c0961
# Parent  598741d39d21dbfd21082673fc026c12a5ca3298
updated

diff -r 598741d39d21 -r e28d7a327870 handouts/ho03.pdf
Binary file handouts/ho03.pdf has changed
diff -r 598741d39d21 -r e28d7a327870 handouts/ho03.tex
--- a/handouts/ho03.tex	Sun May 07 00:20:58 2017 +0100
+++ b/handouts/ho03.tex	Sun May 07 03:01:29 2017 +0100
@@ -570,7 +570,7 @@
 \end{figure}
 
 The case for the sequence regular expression $r_1 \cdot r_2$ is a bit more
-complicated: We are given by recursion two NFAs representing the regular
+complicated: Say, we are given by recursion two NFAs representing the regular
 expressions $r_1$ and $r_2$ respectively.
 
 \begin{center}
@@ -605,10 +605,11 @@
 \end{center}
 
 \noindent The first NFA has some accepting states and the second some
-starting states. We obtain $\epsilon$NFA for $r_1\cdot r_2$ by
-connecting these accepting states with $\epsilon$-transitions to the
-starting states of the second automaton. By doing so we make them
-non-accepting like so:
+starting states. We obtain an $\epsilon$NFA for $r_1\cdot r_2$ by
+connecting the accepting states of the first NFA with
+$\epsilon$-transitions to the starting states of the second
+automaton. By doing so we make the accepting states of the first NFA
+to be non-accepting like so:
 
 \begin{center}
 \begin{tikzpicture}[node distance=3mm,
@@ -645,20 +646,37 @@
 \end{tikzpicture}
 \end{center}
 
-\noindent The case for the choice regular expression $r_1 +
-r_2$ is slightly different: We are given by recursion two
-automata representing $r_1$ and $r_2$ respectively. 
+\noindent The idea behind this construction is that the start of any
+string is first recognised by the first NFA, then we silently change
+to the second NFA; the ending of the string is recognised by the
+second NFA...just like matching of a string by the regular expression
+$r_1\cdot r_2$. The Scala code for this constrction is given in
+Figure~\ref{thompson2} in Lines 16--23. The starting states of the
+$\epsilon$NFA are the starting states of the first NFA (corresponding
+to $r_1$); the accepting function is the accepting function of the
+second NFA (corresponding to $r_2$). The new transition function is
+all the ``old'' transitions plus the $\epsilon$-transitions connecting
+the accepting states of the first NFA to the starting states of the
+first NFA (Lines 18 and 19). The $\epsilon$NFA is then immedately
+translated in a NFA.
+
+
+The case for the choice regular expression $r_1 + r_2$ is slightly
+different: We are given by recursion two NFAs representing $r_1$ and
+$r_2$ respectively.
 
 \begin{center}
 \begin{tikzpicture}[node distance=3mm,
     >=stealth',very thick,
     every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},]
 \node at (0,0)  (1)  {$\mbox{}$};
-\node[state, initial]  (2)  [above right=16mm of 1] {$\mbox{}$};
-\node[state, initial]  (3)  [below right=16mm of 1] {$\mbox{}$};
+\node  (2)  [above=10mm of 1] {};
+\node[state, initial]  (4)  [above=1mm of 2] {$\mbox{}$};
+\node[state, initial]  (5)  [below=1mm of 2] {$\mbox{}$};
+\node[state, initial]  (3)  [below=10mm of 1] {$\mbox{}$};
 
-\node (a)  [right=of 2] {$\ldots$};
-\node[state, accepting]  (a1)  [right=of a] {$\mbox{}$};
+\node (a)  [right=of 2] {$\ldots\,$};
+\node  (a1)  [right=of a] {$$};
 \node[state, accepting]  (a2)  [above=of a1] {$\mbox{}$};
 \node[state, accepting]  (a3)  [below=of a1] {$\mbox{}$};
 
@@ -675,21 +693,24 @@
 \end{tikzpicture}
 \end{center}
 
-\noindent Each automaton has a single start state and
-potentially several accepting states. We obtain a NFA for the
-regular expression $r_1 + r_2$ by introducing a new starting
-state and connecting it with an $\epsilon$-transition to the
-two starting states above, like so
+\noindent Each NFA has some starting states and some accepting
+states. We obtain a NFA for the regular expression $r_1 + r_2$
+by composing the transition functions (this crucially depends
+on knowing that the states of each component NFA are distinct);
+and also combine the starting states and accepting functions:
 
 \begin{center}
-\hspace{2cm}\begin{tikzpicture}[node distance=3mm,
-                             >=stealth',very thick, every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},]
-\node at (0,0) [state, initial]  (1)  {$\mbox{}$};
-\node[state]  (2)  [above right=16mm of 1] {$\mbox{}$};
-\node[state]  (3)  [below right=16mm of 1] {$\mbox{}$};
+\begin{tikzpicture}[node distance=3mm,
+    >=stealth',very thick,
+    every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},]
+\node at (0,0) (1)  {$\mbox{}$};
+\node (2)  [above=10mm of 1] {$$};
+\node[state, initial]  (4)  [above=1mm of 2] {$\mbox{}$};
+\node[state, initial]  (5)  [below=1mm of 2] {$\mbox{}$};
+\node[state, initial]  (3)  [below=10mm of 1] {$\mbox{}$};
 
-\node (a)  [right=of 2] {$\ldots$};
-\node[state, accepting]  (a1)  [right=of a] {$\mbox{}$};
+\node (a)  [right=of 2] {$\ldots\,$};
+\node (a1)  [right=of a] {$$};
 \node[state, accepting]  (a2)  [above=of a1] {$\mbox{}$};
 \node[state, accepting]  (a3)  [below=of a1] {$\mbox{}$};
 
@@ -698,8 +719,8 @@
 \node[state, accepting]  (b2)  [above=of b1] {$\mbox{}$};
 \node[state, accepting]  (b3)  [below=of b1] {$\mbox{}$};
 
-\path[->] (1) edge node [above]  {$\epsilon$} (2);
-\path[->] (1) edge node [below]  {$\epsilon$} (3);
+%\path[->] (1) edge node [above]  {$\epsilon$} (2);
+%\path[->] (1) edge node [below]  {$\epsilon$} (3);
 
 \begin{pgfonlayer}{background}
 \node (3) [rounded corners, inner sep=1mm, thick, draw=black!60, fill=black!20, fit= (1) (a2) (a3) (b2) (b3)] {};
@@ -708,8 +729,8 @@
 \end{tikzpicture}
 \end{center}
 
-\noindent 
-Finally for the $*$-case we have an automaton for $r$
+\noindent The code for this construction is in Figure~\ref{thompson2}
+in Lines 25--33. Finally for the $*$-case we have a NFA for $r$
 
 \begin{center}
 \begin{tikzpicture}[node distance=3mm,
@@ -727,14 +748,16 @@
 \end{tikzpicture}
 \end{center}
 
-\noindent and connect its accepting states to a new starting
-state via $\epsilon$-transitions. This new starting state is
-also an accepting state, because $r^*$ can recognise the
-empty string. This gives the following automaton for $r^*$:
+\noindent and connect its accepting states to a new starting state via
+$\epsilon$-transitions. This new starting state is also an accepting
+state, because $r^*$ can recognise the empty string. This gives the
+following $\epsilon$NFA for $r^*$ (the corresponding code is in
+Figure~\ref{thompson2} in Lines 35--43:
 
 \begin{center}  
 \begin{tikzpicture}[node distance=3mm,
-                             >=stealth',very thick, every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},]
+    >=stealth',very thick,
+    every state/.style={minimum size=3pt,draw=blue!50,very thick,fill=blue!20},]
 \node at (0,0) [state, initial,accepting]  (1)  {$\mbox{}$};
 \node[state]  (2)  [right=16mm of 1] {$\mbox{}$};
 \node (a)  [right=of 2] {$\ldots$};
@@ -752,6 +775,15 @@
 \end{tikzpicture}
 \end{center}
 
+
+To sum ap, you can see in the sequence and star cases the need of
+having silent $\epsilon$-transitions. Similarly the alternative case
+shows the need of the NFA-nondeterminsim. It seems awkward to form the
+`alternative' composition of two DFAs, because DFA do not allow
+several starting and successor states. All these constructions can now
+be put together in the following recursive function:
+
+
 {\small\begin{lstlisting}[language=Scala,linebackgroundcolor=
                   {\ifodd\value{lstnumber}\color{capri!3}\fi}]
 def thompson (r: Rexp) : NFAt = r match {
@@ -764,11 +796,55 @@
 }
 \end{lstlisting}}
 
+\noindent
+It calculates a NFA from a regular expressions. At last we can run a
+NFA for the our evil regular expression examples.
+
 
 \begin{center}
 \begin{tabular}{@{\hspace{-1mm}}c@{\hspace{1mm}}c@{}}  
 \begin{tikzpicture}
 \begin{axis}[
+    title={Graph: $\texttt{a?\{n\}\,a{\{n\}}}$ and strings 
+           $\underbrace{\texttt{a}\ldots \texttt{a}}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.05,0.0)}},
+    ylabel={time in secs},
+    enlargelimits=false,
+    xtick={0,5,...,30},
+    xmax=33,
+    ymax=35,
+    ytick={0,5,...,30},
+    scaled ticks=false,
+    axis lines=left,
+    width=5.5cm,
+    height=4.5cm, 
+    legend entries={Python,Ruby},  
+    legend pos=south east,
+    legend cell align=left]
+  \addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
+  \addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data};
+  % breath-first search in NFAs
+  \addplot[red,mark=*, mark options={fill=white}] table {
+    1 0.00586
+    2 0.01209
+    3 0.03076
+    4 0.08269
+    5 0.12881
+    6 0.25146
+    7 0.51377
+    8 0.89079
+    9 1.62802
+    10 3.05326
+    11 5.92437
+    12 11.67863
+    13 24.00568
+  };
+\end{axis}
+\end{tikzpicture}
+&
+\begin{tikzpicture}
+\begin{axis}[
     title={Graph: $\texttt{(a*)*\,b}$ and strings 
            $\underbrace{\texttt{a}\ldots \texttt{a}}_{n}$},
     xlabel={$n$},
@@ -800,50 +876,10 @@
   };
 \end{axis}
 \end{tikzpicture}
-&
-\begin{tikzpicture}
-\begin{axis}[
-    title={Graph: $\texttt{a?\{n\}\,a{\{n\}}}$ and strings 
-           $\underbrace{\texttt{a}\ldots \texttt{a}}_{n}$},
-    xlabel={$n$},
-    x label style={at={(1.05,0.0)}},
-    ylabel={time in secs},
-    enlargelimits=false,
-    xtick={0,5,...,30},
-    xmax=33,
-    ymax=35,
-    ytick={0,5,...,30},
-    scaled ticks=false,
-    axis lines=left,
-    width=5.5cm,
-    height=4.5cm, 
-    legend entries={Python,Ruby},  
-    legend pos=south east,
-    legend cell align=left]
-  \addplot[blue,mark=*, mark options={fill=white}] table {re-python.data};
-  \addplot[brown,mark=triangle*, mark options={fill=white}] table {re-ruby.data};
-  % breath-first search in NFAs
-  \addplot[red,mark=*, mark options={fill=white}] table {
-    1  0.00741
-    2  0.02657
-    3  0.08317
-    4  0.22133
-    5  0.54463
-    6  1.42062
-    7  4.04926
-    8  12.70395
-    9  39.21555
-    10  112.05466
-  };
-\end{axis}
-\end{tikzpicture}
 \end{tabular}
 \end{center}
 
 
-\noindent This construction of a NFA from a regular expression
-was invented by Ken Thompson in 1968.
-
 
 \subsubsection*{Subset Construction}
 
diff -r 598741d39d21 -r e28d7a327870 progs/display/thompson2.scala
--- a/progs/display/thompson2.scala	Sun May 07 00:20:58 2017 +0100
+++ b/progs/display/thompson2.scala	Sun May 07 03:01:29 2017 +0100
@@ -24,13 +24,12 @@
 
 // alternative of two NFAs
 def NFA_ALT(enfa1: NFAt, enfa2: NFAt) : NFAt = {
-  val Q = TState()
-  val new_delta : eNFAtrans = 
-    { case (Q, None) => enfa1.starts | enfa2.starts }
+  val new_delta : NFAtrans = { 
+    case (q, c) => applyOrElse(enfa1.delta, (q, c)) | 
+                   applyOrElse(enfa2.delta, (q, c)) }
   val new_fins = (q: TState) => enfa1.fins(q) || enfa2.fins(q)
 
-  eNFA(Set(Q), new_delta +++ enfa1.delta +++ enfa2.delta, 
-       new_fins)
+  NFA(enfa1.starts | enfa2.starts, new_delta, new_fins)
 }
 
 // star of a NFA
diff -r 598741d39d21 -r e28d7a327870 progs/nfa.scala
--- a/progs/nfa.scala	Sun May 07 00:20:58 2017 +0100
+++ b/progs/nfa.scala	Sun May 07 03:01:29 2017 +0100
@@ -85,4 +85,4 @@
 
 // subset constructions
 
-def subset(nfa: NFA[A, C]) : DFA[Set[A], C] =
+//def subset(nfa: NFA[A, C]) : DFA[Set[A], C] =
diff -r 598741d39d21 -r e28d7a327870 progs/thompson.scala
--- a/progs/thompson.scala	Sun May 07 00:20:58 2017 +0100
+++ b/progs/thompson.scala	Sun May 07 03:01:29 2017 +0100
@@ -60,12 +60,12 @@
 
 // alternative of two NFAs
 def NFA_ALT(enfa1: NFAt, enfa2: NFAt) : NFAt = {
-  val Q = TState()
-  val new_delta : eNFAtrans = 
-    { case (Q, None) => enfa1.starts | enfa2.starts }
+  val new_delta : NFAtrans = { 
+    case (q, c) =>  applyOrElse(enfa1.delta, (q, c)) | 
+                    applyOrElse(enfa2.delta, (q, c)) }
   val new_fins = (q: TState) => enfa1.fins(q) || enfa2.fins(q)
 
-  eNFA(Set(Q), new_delta +++ enfa1.delta +++ enfa2.delta, new_fins)
+  NFA(enfa1.starts | enfa2.starts, new_delta, new_fins)
 }
 
 // star of a NFA
@@ -124,10 +124,10 @@
 
 
 // the evil regular expression  a?{n} a{n}
-def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
+def EVIL1(n: Int) : Rexp = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
 
 // the evil regular expression (a*)*b
-val EVIL2 = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
+val EVIL2 : Rexp = SEQ(STAR(STAR(CHAR('a'))), CHAR('b'))
 
 //for measuring time
 def time_needed[T](i: Int, code: => T) = {
@@ -140,11 +140,11 @@
 // the size of the NFA can be large, 
 // thus slowing down the breadth-first search
 
-for (i <- 1 to 10) {
+for (i <- 1 to 13) {
   println(i + ": " + "%.5f".format(time_needed(2, tmatches(EVIL1(i), "a" * i))))
 }
 
-for (i <- 1 to 10) {
+for (i <- 1 to 100 by 5) {
   println(i + " " + "%.5f".format(time_needed(2, tmatches(EVIL2, "a" * i))))
 }