afl-material: changeset 414:065ca01b62ae

--- a/data.sty	Mon Aug 22 09:14:22 2016 +0200
+++ b/data.sty	Mon Aug 22 23:05:43 2016 +0200
@@ -137,6 +137,18 @@
 951 31.96038
 \end{filecontents}
 
+\begin{filecontents}{re2c.data}
+1 0.00070
+501 0.02137
+1001 0.04070
+1501 0.08710
+2001 0.14441
+2501 0.21303
+3001 0.33987
+3501 0.43522
+4001 0.55873
+\end{filecontents}
+
 \begin{filecontents}{re3.data}
 1 0.001605
 501 0.131066
@@ -164,6 +176,21 @@
 11501 7.95864
 \end{filecontents}
 
+\begin{filecontents}{re3a.data}
+1 0.00006
+500001 0.12975
+1000001 0.38887
+1500001 0.61373
+2000001 0.78268
+2500001 0.98069
+3000001 1.41858
+3500001 1.68450
+4000001 1.84533
+4500001 1.91059
+5000001 1.98881
+5500001 2.66755
+6000001 3.16683
+\end{filecontents}
  
 \begin{filecontents}{nfa.data}
 0  0.00099

Binary file handouts/ho01.pdf has changed

Binary file handouts/ho02.pdf has changed

--- a/handouts/ho02.tex	Mon Aug 22 09:14:22 2016 +0200
+++ b/handouts/ho02.tex	Mon Aug 22 23:05:43 2016 +0200
@@ -24,10 +24,12 @@
 
 
 \begin{center}
+Plots: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$\\
 \begin{tabular}{@{}cc@{}}
 \begin{tikzpicture}
 \begin{axis}[
-    xlabel={\small $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.05,0.0)}},
     ylabel={\small time in secs},
     enlargelimits=false,
     xtick={0,5,...,30},
@@ -50,7 +52,8 @@
 &
 \begin{tikzpicture}
   \begin{axis}[
-    xlabel={\small $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.1,0.05)}},
     ylabel={\small time in secs},
     enlargelimits=false,
     xtick={0,3000,...,12000},
@@ -69,10 +72,12 @@
 \end{center}
 
 \begin{center}
+Plots: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$
 \begin{tabular}{@{}cc@{}}
 \begin{tikzpicture}
 \begin{axis}[
-    xlabel={$(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.05,0.0)}},
     ylabel={time in secs},
     enlargelimits=false,
     xtick={0,5,...,30},
@@ -92,7 +97,8 @@
 &
 \begin{tikzpicture}
   \begin{axis}[
-    xlabel={$(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$}, 
+    xlabel={$n$},
+    x label style={at={(1.1,0.05)}},
     ylabel={time in secs},
     enlargelimits=false,
     xtick={0,3000,...,12000},
@@ -129,7 +135,7 @@
 In such cases there is no way we can implement an exhaustive test for
 whether a string is member of this set or not. In contrast our
 matching algorithm will operate on the regular expression $r$ and
-string $s$, only, which are both finite objects. Before we explain to
+string $s$, only, which are both finite objects. Before we explain 
 the matching algorithm, however, let us have a closer look at what it
 means when two regular expressions are equivalent.
 
@@ -293,19 +299,19 @@
 
 \begin{center}
 \begin{tabular}{l@ {\hspace{2mm}}c@ {\hspace{2mm}}l}
-  $der\, c\, (\ZERO)$      & $\dn$ & $\ZERO$\\
-  $der\, c\, (\ONE)$         & $\dn$ & $\ZERO$ \\
-  $der\, c\, (d)$                & $\dn$ & if $c = d$ then $\ONE$ else $\ZERO$\\
-  $der\, c\, (r_1 + r_2)$        & $\dn$ & $der\, c\, r_1 + der\, c\, r_2$\\
-  $der\, c\, (r_1 \cdot r_2)$  & $\dn$  & if $\textit{nullable} (r_1)$\\
-  & & then $(der\,c\,r_1) \cdot r_2 + der\, c\, r_2$\\ 
-  & & else $(der\, c\, r_1) \cdot r_2$\\
-  $der\, c\, (r^*)$          & $\dn$ & $(der\,c\,r) \cdot (r^*)$
+  $\textit{der}\, c\, (\ZERO)$      & $\dn$ & $\ZERO$\\
+  $\textit{der}\, c\, (\ONE)$         & $\dn$ & $\ZERO$ \\
+  $\textit{der}\, c\, (d)$                & $\dn$ & if $c = d$ then $\ONE$ else $\ZERO$\\
+  $\textit{der}\, c\, (r_1 + r_2)$        & $\dn$ & $\textit{der}\, c\, r_1 + \textit{der}\, c\, r_2$\\
+  $\textit{der}\, c\, (r_1 \cdot r_2)$  & $\dn$  & if $\textit{nullable} (r_1)$\\
+  & & then $(\textit{der}\,c\,r_1) \cdot r_2 + \textit{der}\, c\, r_2$\\ 
+  & & else $(\textit{der}\, c\, r_1) \cdot r_2$\\
+  $\textit{der}\, c\, (r^*)$          & $\dn$ & $(\textit{der}\,c\,r) \cdot (r^*)$
   \end{tabular}
 \end{center}
 
 \noindent The first two clauses can be rationalised as
-follows: recall that $der$ should calculate a regular
+follows: recall that $\textit{der}$ should calculate a regular
 expression so that given the ``input'' regular expression can
 match a string of the form $c\!::\!s$, we want a regular
 expression for $s$. Since neither $\ZERO$ nor $\ONE$
@@ -320,32 +326,33 @@
 involved. Fortunately, the $+$-case is still relatively
 straightforward: all strings of the form $c\!::\!s$ are either
 matched by the regular expression $r_1$ or $r_2$. So we just
-have to recursively call $der$ with these two regular
+have to recursively call $\textit{der}$ with these two regular
 expressions and compose the results again with $+$. Makes
 sense? 
 
 The $\cdot$-case is more complicated: if $r_1\cdot r_2$
 matches a string of the form $c\!::\!s$, then the first part
 must be matched by $r_1$. Consequently, it makes sense to
-construct the regular expression for $s$ by calling $der$ with
+construct the regular expression for $s$ by calling $\textit{der}$ with
 $r_1$ and ``appending'' $r_2$. There is however one exception
 to this simple rule: if $r_1$ can match the empty string, then
 all of $c\!::\!s$ is matched by $r_2$. So in case $r_1$ is
 nullable (that is can match the empty string) we have to allow
-the choice $der\,c\,r_2$ for calculating the regular
+the choice $\textit{der}\,c\,r_2$ for calculating the regular
 expression that can match $s$. Therefore we have to add the
-regular expression $der\,c\,r_2$ in the result. The $*$-case
+regular expression $\textit{der}\,c\,r_2$ in the result. The $*$-case
 is again simple: if $r^*$ matches a string of the form
 $c\!::\!s$, then the first part must be ``matched'' by a
-single copy of $r$. Therefore we call recursively $der\,c\,r$
-and ``append'' $r^*$ in order to match the rest of $s$.
+single copy of $r$. Therefore we call recursively $\textit{der}\,c\,r$
+and ``append'' $r^*$ in order to match the rest of $s$. Still
+makes sense?
 
-If this did not make sense yet, here is another way to rationalise
-the definition of $der$ by considering the following operation
+If all this did not make sense yet, here is another way to rationalise
+the definition of $\textit{der}$ by considering the following operation
 on sets:
 
 \begin{equation}\label{Der}
-Der\,c\,A\;\dn\;\{s\,|\,c\!::\!s \in A\}
+\textit{Der}\,c\,A\;\dn\;\{s\,|\,c\!::\!s \in A\}
 \end{equation}
 
 \noindent This operation essentially transforms a set of
@@ -353,37 +360,37 @@
 with $c$ and then strips off the $c$ from all the remaining
 strings. For example suppose $A = \{f\!oo, bar, f\!rak\}$ then
 
-\[ Der\,f\,A = \{oo, rak\}\quad,\quad 
-   Der\,b\,A = \{ar\} \quad \text{and} \quad 
-   Der\,a\,A = \{\} 
+\[ \textit{Der}\,f\,A = \{oo, rak\}\quad,\quad 
+   \textit{Der}\,b\,A = \{ar\} \quad \text{and} \quad 
+   \textit{Der}\,a\,A = \{\} 
 \]
  
 \noindent
-Note that in the last case $Der$ is empty, because no string in $A$
+Note that in the last case $\textit{Der}$ is empty, because no string in $A$
 starts with $a$. With this operation we can state the following
-property about $der$:
+property about $\textit{der}$:
 
 \[
-L(der\,c\,r) = Der\,c\,(L(r))
+L(\textit{der}\,c\,r) = \textit{Der}\,c\,(L(r))
 \]
 
 \noindent
-This property clarifies what regular expression $der$ calculates,
+This property clarifies what regular expression $\textit{der}$ calculates,
 namely take the set of strings that $r$ can match (that is $L(r)$),
 filter out all strings not starting with $c$ and strip off the $c$
 from the remaining strings---this is exactly the language that
-$der\,c\,r$ can match.
+$\textit{der}\,c\,r$ can match.
 
 If we want to find out whether the string $abc$ is matched by
-the regular expression $r_1$ then we can iteratively apply $der$
+the regular expression $r_1$ then we can iteratively apply $\textit{der}$
 as follows
 
 \begin{center}
 \begin{tabular}{rll}
 Input: $r_1$, $abc$\medskip\\
-Step 1: & build derivative of $a$ and $r_1$ & $(r_2 = der\,a\,r_1)$\smallskip\\
-Step 2: & build derivative of $b$ and $r_2$ & $(r_3 = der\,b\,r_2)$\smallskip\\
-Step 3: & build derivative of $c$ and $r_3$ & $(r_4 = der\,b\,r_3)$\smallskip\\
+Step 1: & build derivative of $a$ and $r_1$ & $(r_2 = \textit{der}\,a\,r_1)$\smallskip\\
+Step 2: & build derivative of $b$ and $r_2$ & $(r_3 = \textit{der}\,b\,r_2)$\smallskip\\
+Step 3: & build derivative of $c$ and $r_3$ & $(r_4 = \textit{der}\,b\,r_3)$\smallskip\\
 Step 4: & the string is exhausted; test & ($\textit{nullable}(r_4)$)\\
         & whether $r_4$ can recognise the\\
         & empty string\smallskip\\
@@ -391,50 +398,50 @@
 \end{tabular}
 \end{center}
 
-\noindent Again the operation $Der$ might help to rationalise
+\noindent Again the operation $\textit{Der}$ might help to rationalise
 this algorithm. We want to know whether $abc \in L(r_1)$. We
-do not know yet---but let us assume it is. Then $Der\,a\,L(r_1)$
+do not know yet---but let us assume it is. Then $\textit{Der}\,a\,L(r_1)$
 builds the set where all the strings not starting with $a$ are
 filtered out. Of the remaining strings, the $a$ is stripped
 off. So we should still have $bc$ in the set.
 Then we continue with filtering out all strings not
 starting with $b$ and stripping off the $b$ from the remaining
-strings, that means we build $Der\,b\,(Der\,a\,(L(r_1)))$.
+strings, that means we build $\textit{Der}\,b\,(\textit{Der}\,a\,(L(r_1)))$.
 Finally we filter out all strings not starting with $c$ and
 strip off $c$ from the remaining string. This is
-$Der\,c\,(Der\,b\,(Der\,a\,(L(r))))$. Now if $abc$ was in the 
-original set ($L(r_1)$), then in $Der\,c\,(Der\,b\,(Der\,a\,(L(r))))$ 
+$\textit{Der}\,c\,(\textit{Der}\,b\,(\textit{Der}\,a\,(L(r_1))))$. Now if $abc$ was in the 
+original set ($L(r_1)$), then $\textit{Der}\,c\,(\textit{Der}\,b\,(\textit{Der}\,a\,(L(r_1))))$ 
 must contain the empty string. If not, then $abc$ was not in the 
 language we started with.
 
-Our matching algorithm using $der$ and $\textit{nullable}$ works
-similarly, just using regular expression instead of sets. For
-this we need to extend the notion of derivatives from single
+Our matching algorithm using $\textit{der}$ and $\textit{nullable}$ works
+similarly, just using regular expression instead of sets. In order to
+define our algorithm we need to extend the notion of derivatives from single
 characters to strings. This can be done using the following
-function, taking a string and regular expression as input and
+function, taking a string and a regular expression as input and
 a regular expression as output.
 
 \begin{center}
 \begin{tabular}{@ {}l@ {\hspace{2mm}}c@ {\hspace{2mm}}l@ {\hspace{-10mm}}l@ {}}
   $\textit{ders}\, []\, r$     & $\dn$ & $r$ & \\
-  $\textit{ders}\, (c\!::\!s)\, r$ & $\dn$ & $\textit{ders}\,s\,(der\,c\,r)$ & \\
+  $\textit{ders}\, (c\!::\!s)\, r$ & $\dn$ & $\textit{ders}\,s\,(\textit{der}\,c\,r)$ & \\
   \end{tabular}
 \end{center}
 
-\noindent This function iterates $der$ taking one character at
+\noindent This function iterates $\textit{der}$ taking one character at
 the time from the original string until it is exhausted.
-Having $ders$ in place, we can finally define our matching
+Having $\textit{der}s$ in place, we can finally define our matching
 algorithm:
 
 \[
-matches\,s\,r \dn \textit{nullable}(ders\,s\,r)
+\textit{matches}\,s\,r \dn \textit{nullable}(\textit{ders}\,s\,r)
 \]
 
 \noindent
 and we can claim that
 
 \[
-matches\,s\,r\quad\text{if and only if}\quad s\in L(r)
+\textit{matches}\,s\,r\quad\text{if and only if}\quad s\in L(r)
 \]
 
 \noindent holds, which means our algorithm satisfies the
@@ -442,11 +449,12 @@
 whether the claim holds any water is a different question,
 which for example is the point of the Strand-2 Coursework.
 
-This algorithm was introduced by Janus Brzozowski in 1964. Its
+This algorithm was introduced by Janus Brzozowski in 1964, but 
+is more widely known only in the last 10 or so years. Its
 main attractions are simplicity and being fast, as well as
 being easily extendable for other regular expressions such as
 $r^{\{n\}}$, $r^?$, $\sim{}r$ and so on (this is subject of
-Strand-1 Coursework 1).
+Strand-1 Coursework 1). 
 
 \subsection*{The Matching Algorithm in Scala}
 
@@ -465,22 +473,65 @@
   definitions very closely.\label{scala1}}
 \end{figure}
 
-For running the algorithm with our favourite example, the evil
+
+Remember our second example involving the regular expression
+$(a^*)^* \cdot b$ which could not match strings of $n$ \texttt{a}s. 
+Java needed around 30 seconds to find this out a string with $n=28$.
+It seems our algorithm is doing rather well in comparison:
+
+\begin{center}
+\begin{tikzpicture}
+\begin{axis}[
+    title={Plot: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.05,0.0)}},
+    ylabel={time in secs},
+    enlargelimits=false,
+    xtick={0,500,...,4000},
+    xmax=4100,
+    ytick={0,5,...,30},
+    scaled ticks=false,
+    axis lines=left,
+    width=10cm,
+    height=4.5cm, 
+    legend entries={Java,Scala V1},  
+    legend pos=north east,
+    legend cell align=left]
+\addplot[blue,mark=*, mark options={fill=white}] 
+  table {re-java.data};
+\addplot[red,mark=triangle*,mark options={fill=white}]
+  table {re2c.data};
+\end{axis}
+\end{tikzpicture}
+\end{center}
+
+\noindent
+This is no error: it hardly takes more than half a second for
+strings up to the length of 4000. After that we receive a
+StackOverflow exception, but still\ldots
+
+For running the algorithm with our first example, the evil
 regular expression $a^?{}^{\{n\}}a^{\{n\}}$, we need to implement
 the optional regular expression and the exactly $n$-times
 regular expression. This can be done with the translations
 
 \lstinputlisting[numbers=none]{../progs/app51.scala}
 
-\noindent Running the matcher with the example, we find it is
+\noindent Running the matcher with this example, we find it is
 slightly worse then the matcher in Ruby and Python.
 Ooops\ldots
 
 \begin{center}
-\begin{tikzpicture}
\begin{axis}[
    xlabel={\pcode{a}s},
    ylabel={time in secs},
+\begin{tikzpicture}
+\begin{axis}[    
+    title={Plot: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.05,0.0)}},
+    ylabel={time in secs},
     enlargelimits=false,
     xtick={0,5,...,30},
-    xmax=30,
    ytick={0,5,...,30},
+    xmax=30,
+    ytick={0,5,...,30},
     scaled ticks=false,
     axis lines=left,
     width=6cm,
@@ -493,7 +544,9 @@
 \addplot[brown,mark=pentagon*, mark options={fill=white}] 
   table {re-ruby.data};  
 \addplot[red,mark=triangle*,mark options={fill=white}] 
-  table {re1.data};  
\end{axis}
\end{tikzpicture}
+  table {re1.data};  
+\end{axis}
+\end{tikzpicture}
 \end{center}
 
 \noindent Analysing this failure we notice that for
@@ -525,41 +578,50 @@
 \noindent With this fix we have a constant ``size'' regular
 expression for our running example no matter how large $n$ is.
 This means we have to also add cases for \pcode{NTIMES} in the
-functions $\textit{nullable}$ and $der$. Does the change have any
+functions $\textit{nullable}$ and $\textit{der}$. Does the change have any
 effect?
 
 \begin{center}
-\begin{tikzpicture}
\begin{axis}[
    xlabel={\pcode{a}s},
    ylabel={time in secs},
+\begin{tikzpicture}
+\begin{axis}[
+    title={Plot: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.01,0.0)}},
+    ylabel={time in secs},
     enlargelimits=false,
     xtick={0,100,...,1000},
-    xmax=1000,
    ytick={0,5,...,30},
+    xmax=1100,
+    ytick={0,5,...,30},
     scaled ticks=false,
     axis lines=left,
-    width=9.5cm,
+    width=10cm,
     height=5cm, 
     legend entries={Python,Ruby,Scala V1,Scala V2},  
     legend pos=outer north east,
-    legend cell align=left  
]
+    legend cell align=left]
 \addplot[blue,mark=*, mark options={fill=white}] 
   table {re-python.data};
 \addplot[brown,mark=pentagon*, mark options={fill=white}] 
   table {re-ruby.data};  
 \addplot[red,mark=triangle*,mark options={fill=white}] 
-  table {re1.data};  
\addplot[green,mark=square*,mark options={fill=white}] 
-  table {re2b.data};
\end{axis}
\end{tikzpicture}
+  table {re1.data};  
+\addplot[green,mark=square*,mark options={fill=white}] 
+  table {re2b.data};
+\end{axis}
+\end{tikzpicture}
 \end{center}
 
 \noindent Now we are talking business! The modified matcher 
 can within 30 seconds handle regular expressions up to
-$n = 950$ before a StackOverflow is raised. Python and Ruby 
-(and our first version) could only handle $n = 27$ or so in 30 
-second.
+$n = 950$ before a StackOverflow is raised. Recall that Python and Ruby 
+(and our first version, Scala V1) could only handle $n = 27$ or so in 30 
+seconds. There is no change for our second example 
+$(a^*)^* \cdot b$---so this is still good.
 
-SECOND EXAMPLE
 
 The moral is that our algorithm is rather sensitive to the
 size of regular expressions it needs to handle. This is of
-course obvious because both $\textit{nullable}$ and $der$ frequently
+course obvious because both $\textit{nullable}$ and $\textit{der}$ frequently
 need to traverse the whole regular expression. There seems,
 however, one more issue for making the algorithm run faster.
 The derivative function often produces ``useless''
@@ -568,9 +630,9 @@
 
 \begin{center}
 \begin{tabular}{l}
-$der\,a\,r = ((\ONE \cdot b) + \ZERO) \cdot r$\\
-$der\,b\,r = ((\ZERO \cdot b) + \ONE)\cdot r$\\
-$der\,c\,r = ((\ZERO \cdot b) + \ZERO)\cdot r$
+$\textit{der}\,a\,r = ((\ONE \cdot b) + \ZERO) \cdot r$\\
+$\textit{der}\,b\,r = ((\ZERO \cdot b) + \ONE)\cdot r$\\
+$\textit{der}\,c\,r = ((\ZERO \cdot b) + \ZERO)\cdot r$
 \end{tabular}
 \end{center}
 
@@ -581,9 +643,9 @@
 
 \begin{center}
 \begin{tabular}{l}
-$der\,a\,r \equiv b \cdot r$\\
-$der\,b\,r \equiv r$\\
-$der\,c\,r \equiv \ZERO$
+$\textit{der}\,a\,r \equiv b \cdot r$\\
+$\textit{der}\,b\,r \equiv r$\\
+$\textit{der}\,c\,r \equiv \ZERO$
 \end{tabular}
 \end{center}
 
@@ -613,10 +675,14 @@
 
 \begin{center}
 \begin{tikzpicture}
-\begin{axis}[xlabel={\pcode{a}s},ylabel={time in secs},
+\begin{axis}[
+    title={Plot: $a^{?\{n\}} \cdot a^{\{n\}}$ and strings $\underbrace{a\ldots a}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.04,0.0)}},
+    ylabel={time in secs},
     enlargelimits=false,
     xtick={0,2000,...,12000},
-    xmax=12000,
+    xmax=13000,
     ytick={0,5,...,30},
     scaled ticks=false,
     axis lines=left,
@@ -629,7 +695,32 @@
 \end{tikzpicture}
 \end{center}
 
-SECOND EXAMPLE
+\begin{center}
+\begin{tikzpicture}
+\begin{axis}[
+    title={Plot: $(a^*)^* \cdot b$ and strings $\underbrace{a\ldots a}_{n}$},
+    xlabel={$n$},
+    x label style={at={(1.09,0.0)}},
+    ylabel={time in secs},
+    enlargelimits=false,
+    %xtick={0,1e+6,...,6e+6},
+    xmax=6200000,
+    ytick={0,5,...,30},
+    ymax=33,
+    scaled ticks=false,
+    axis lines=left,
+    width=9cm,
+    height=5cm, 
+    legend entries={Scala V3}]
+\addplot[black,mark=square*,mark options={fill=white}] table {re3a.data};
+\end{axis}
+\end{tikzpicture}
+\end{center}
+
+\section{Epilogue}
+
+TBD
+
 
 \section*{Proofs}
 
@@ -771,23 +862,23 @@
 Given this recipe, I let you show
 
 \begin{equation}
-Ders\,s\,(L(r)) = L(ders\,s\,r)
+\textit{Ders}\,s\,(L(r)) = L(\textit{ders}\,s\,r)
 \label{dersprop}
 \end{equation}
 
-\noindent by induction on $s$. Recall $Der$ is defined for 
-character---see \eqref{Der}; $Ders$ is similar, but for strings:
+\noindent by induction on $s$. Recall $\textit{Der}$ is defined for 
+character---see \eqref{Der}; $\textit{Ders}$ is similar, but for strings:
 
 \[
-Ders\,s\,A\;\dn\;\{s'\,|\,s @ s' \in A\}
+\textit{Ders}\,s\,A\;\dn\;\{s'\,|\,s @ s' \in A\}
 \]
 
 \noindent In this proof you can assume the following property
-for $der$ and $Der$ has already been proved, that is you can
+for $der$ and $\textit{Der}$ has already been proved, that is you can
 assume
 
 \[
-L(der\,c\,r) = Der\,c\,(L(r))
+L(\textit{der}\,c\,r) = \textit{Der}\,c\,(L(r))
 \]
 
 \noindent holds (this would be of course a property that
@@ -806,16 +897,16 @@
 following problem
 
 \begin{equation}
-[] \in Ders\,s\,(L(r))
+[] \in \textit{Ders}\,s\,(L(r))
 \label{dersstep}
 \end{equation}
 
 \noindent But we have shown above in \eqref{dersprop}, that
-the $Ders$ can be replaced by $L(ders\ldots)$. That means 
+the $\textit{Ders}$ can be replaced by $L(\textit{ders}\ldots)$. That means 
 \eqref{dersstep} is equivalent to 
 
 \begin{equation}
-[] \in L(ders\,s\,r)
+[] \in L(\textit{ders}\,s\,r)
 \label{prefinalstep}
 \end{equation}
 
@@ -825,13 +916,13 @@
 \eqref{prefinalstep} is equivalent with
  
 \[
-\textit{nullable}(ders\,s\,r)
+\textit{nullable}(\textit{ders}\,s\,r)
 \] 
 
 \noindent But this is just the definition of $matches$
 
 \[
-matches\,s\,r \dn nullable(ders\,s\,r)
+matches\,s\,r \dn nullable(\textit{ders}\,s\,r)
 \]
 
 \noindent In effect we have shown

--- a/progs/re2.scala	Mon Aug 22 09:14:22 2016 +0200
+++ b/progs/re2.scala	Mon Aug 22 23:05:43 2016 +0200
@@ -41,7 +41,8 @@
 //optional: one or zero times
 def OPT(r: Rexp) = ALT(r, EMPTY)
 
-def EVIL(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
+def EVIL1(n: Int) = SEQ(NTIMES(OPT(CHAR('a')), n), NTIMES(CHAR('a'), n))
+val EVIL2 = SEQ(STAR(CHAR('a')), CHAR('b'))
 
 def time_needed[T](i: Int, code: => T) = {
   val start = System.nanoTime()
@@ -51,12 +52,17 @@
 }
 
 //for (i <- 1 to 100) {
-//  println(i + ": " + "%.5f".format(time_needed(1, matches(EVIL(i), "a" * i))))
+//  println(i + ": " + "%.5f".format(time_needed(1, matches(EVIL1(i), "a" * i))))
 //}
 
 //a bit bolder test
 for (i <- 1 to 1000 by 50) {
-  println(i + " " + "%.5f".format(time_needed(1, matches(EVIL(i), "a" * i))))
+  println(i + " " + "%.5f".format(time_needed(1, matches(EVIL1(i), "a" * i))))
 }
 
 
+for (i <- 1 to 4002 by 500) {
+  println(i + " " + "%.5f".format(time_needed(4, matches(EVIL2, "a" * i))))
+}
+
+

--- a/progs/re4.scala	Mon Aug 22 09:14:22 2016 +0200
+++ b/progs/re4.scala	Mon Aug 22 23:05:43 2016 +0200
@@ -85,18 +85,9 @@
 }
 
 
-def ders2 (s: List[Char], r: Rexp) : Rexp = (s, r) match {
-  case (Nil, r) => r
-  case (s, ZERO) => ZERO
-  case (s, ONE) => if (s == Nil) ONE else ZERO
-  case (s, CHAR(c)) => if (s == List(c)) ONE else 
-                       if (s == Nil) CHAR(c) else ZERO
-  case (s, ALT(r1, r2)) => ALT(ders2(s, r2), ders2(s, r2))
-  case (c::s, r) => ders2(s, der(c, r).simp)
-}
 
 // main matcher function
-def matcher(r: Rexp, s: String) : Boolean = nullable(ders2(s.toList, r))
+def matcher(r: Rexp, s: String) : Boolean = nullable(ders(s.toList, r))
 
 
 //one or zero
@@ -119,7 +110,7 @@
 //  println(i + " " + "%.5f".format(time_needed(10, matcher(EVIL1(i), "a" * i))))
 //}
 
-for (i <- 1 to 6000000 by 500000) {
+for (i <- 1 to 6000001 by 500000) {
   println(i + " " + "%.5f".format(time_needed(10, matcher(EVIL2, "a" * i))))
 }

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/progs/re5.scala	Mon Aug 22 23:05:43 2016 +0200
@@ -0,0 +1,118 @@
+import scala.annotation.tailrec    
+import scala.language.implicitConversions
+    
+abstract class Rexp {
+  def simp : Rexp = this
+}
+
+case object ZERO extends Rexp
+case object ONE extends Rexp
+case class CHAR(c: Char) extends Rexp
+case class ALT(r1: Rexp, r2: Rexp) extends Rexp {
+  override def simp = (r1.simp, r2.simp) match {
+    case (ZERO, r) => r
+    case (r, ZERO) => r
+    case (r, ONE) => if (nullable(r)) r else ALT(r, ONE)
+    case (ONE, r) => if (nullable(r)) r else ALT(r, ONE)
+    case (r1, r2) => if (r1 == r2) r1 else ALT(r1, r2)
+  }
+}
+case class SEQ(r1: Rexp, r2: Rexp) extends Rexp {
+  override def simp = (r1.simp, r2.simp) match {
+    case (ZERO, _) => ZERO
+    case (_, ZERO) => ZERO
+    case (ONE, r) => r
+    case (r, ONE) => r
+    case (r1, r2) => SEQ(r1, r2)
+  }
+}
+case class STAR(r: Rexp) extends Rexp {
+  override def simp = r.simp match {
+    case ZERO => ONE
+    case ONE => ONE
+    case r => STAR(r)
+  }
+}
+case class NTIMES(r: Rexp, n: Int) extends Rexp {
+  override def simp = if (n == 0) ONE else 
+    r.simp match {
+      case ZERO => ZERO
+      case ONE => ONE
+      case r => NTIMES(r, n)
+    }
+}
+
+// some convenience for typing in regular expressions
+def charlist2rexp(s : List[Char]) : Rexp = s match {
+  case Nil => ONE
+  case c::Nil => CHAR(c)
+  case c::s => SEQ(CHAR(c), charlist2rexp(s))
+}
+implicit def string2rexp(s : String) : Rexp = charlist2rexp(s.toList)
+
+
+// nullable function: tests whether the regular 
+// expression can recognise the empty string
+def nullable (r: Rexp) : Boolean = r match {
+  case ZERO => false
+  case ONE => true
+  case CHAR(_) => false
+  case ALT(r1, r2) => nullable(r1) || nullable(r2)
+  case SEQ(r1, r2) => nullable(r1) && nullable(r2)
+  case STAR(_) => true
+  case NTIMES(r, i) => if (i == 0) true else nullable(r)
+}
+
+// derivative of a regular expression w.r.t. a character
+def der (c: Char, r: Rexp) : Rexp = r match {
+  case ZERO => ZERO
+  case ONE => ZERO
+  case CHAR(d) => if (c == d) ONE else ZERO
+  case ALT(r1, r2) => ALT(der(c, r1), der(c, r2))
+  case SEQ(r1, r2) => 
+    if (nullable(r1)) ALT(SEQ(der(c, r1), r2), der(c, r2))
+    else SEQ(der(c, r1), r2)
+  case STAR(r) => SEQ(der(c, r), STAR(r))
+  case NTIMES(r, i) => 
+    if (i == 0) ZERO else SEQ(der(c, r), NTIMES(r, i - 1))
+}
+
+// derivative w.r.t. a string (iterates der)
+def ders2(s: List[Char], r: Rexp) : Rexp = (s, r) match {
+  case (Nil, r) => r
+  case (s, ZERO) => ZERO
+  case (s, ONE) => if (s == Nil) ONE else ZERO
+  case (s, CHAR(c)) => if (s == List(c)) ONE else 
+                       if (s == Nil) CHAR(c) else ZERO
+  case (s, ALT(r1, r2)) => ALT(ders2(s, r2), ders2(s, r2))
+  case (c::s, r) => ders2(s, der(c, r).simp)
+}
+
+// main matcher function
+def matcher(r: Rexp, s: String) : Boolean = nullable(ders2(s.toList, r))
+
+
+//one or zero
+def OPT(r: Rexp) = ALT(r, ONE)
+
+def EVIL1(n: Int) = SEQ(NTIMES(OPT("a"), n), NTIMES("a", n))
+val EVIL2 = SEQ(STAR("a"), "b")
+
+def time_needed[T](i: Int, code: => T) = {
+  val start = System.nanoTime()
+  for (j <- 1 to i) code
+  val end = System.nanoTime()
+  (end - start)/(i * 1.0e9)
+}
+
+val i = 5000
+println(i + " " + "%.5f".format(time_needed(10, matcher(EVIL1(i), "a" * i))))
+
+for (i <- 1 to 80000 by 10000) {
+  println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL1(i), "a" * i))))
+}
+
+for (i <- 1 to 10000001 by 1000000) {
+  println(i + " " + "%.5f".format(time_needed(2, matcher(EVIL2, "a" * i))))
+}
+

author	Christian Urban <urbanc@in.tum.de>
	Mon, 22 Aug 2016 23:05:43 +0200
changeset 414	065ca01b62ae
parent 413	b7221df9662a
child 415	4ae59fd3b174

data.sty		file \| annotate \| diff \| comparison \| revisions
handouts/ho01.pdf		file \| annotate \| diff \| comparison \| revisions
handouts/ho02.pdf		file \| annotate \| diff \| comparison \| revisions
handouts/ho02.tex		file \| annotate \| diff \| comparison \| revisions
progs/re2.scala		file \| annotate \| diff \| comparison \| revisions
progs/re4.scala		file \| annotate \| diff \| comparison \| revisions
progs/re5.scala		file \| annotate \| diff \| comparison \| revisions